Пример #1
0
def makets():
    """
    Build TimeSeries object used in this tutorial
    """
    d = TimeSeries()
    setbasics(d, 1000)
    y = rickerwave(2.0, 0.005)
    ny = len(y)
    for i in range(min(ny, 1000)):
        d.data[i] = y[i]
    return d
Пример #2
0
def test_PowerSpectrum():
    ts = TimeSeries(100)
    ts.data[0] = 1.0  # delta function - spectrum will be flat
    ts.live = True
    engine = MTPowerSpectrumEngine(100, 5, 10)
    spec = engine.apply(ts)
    assert spec.Nyquist() == spec.f0 + spec.df * spec.nf()

    spec_copy = pickle.loads(pickle.dumps(spec))
    assert spec.df == spec_copy.df
    assert spec.f0 == spec_copy.f0
    assert spec.spectrum_type == spec_copy.spectrum_type
    assert np.allclose(spec.spectrum, spec_copy.spectrum)
Пример #3
0
def test_Ensemble(Ensemble):
    md = Metadata()
    md['double'] = 3.14
    md['bool'] = True
    md['long'] = 7
    es = Ensemble(md, 3)
    if isinstance(es, TimeSeriesEnsemble):
        d = TimeSeries(10)
        d = make_constant_data_ts(d)
        es.member.append(d)
        es.member.append(d)
        es.member.append(d)
    else:
        d = Seismogram(10)
        d = make_constant_data_seis(d)
        es.member.append(d)
        es.member.append(d)
        es.member.append(d)
    es.sync_metadata(['double', 'long'])
    assert es.member[0].is_defined('bool')
    assert es.member[0]['bool'] == True
    assert not es.member[0].is_defined('double')
    assert not es.member[0].is_defined('long')
    es.sync_metadata()
    assert es.member[1].is_defined('double')
    assert es.member[1].is_defined('long')
    assert es.member[1]['double'] == 3.14
    assert es.member[1]['long'] == 7
    es.update_metadata(Metadata({'k': 'v'}))
    assert es['k'] == 'v'
Пример #4
0
def test_windowdata():
    npts=1000
    ts=TimeSeries()
    setbasics(ts,npts)
    for i in range(npts):
        ts.data[i]=float(i)
    t3c=Seismogram()
    setbasics(t3c,npts)
    for k in range(3):
        for i in range(npts):
            t3c.data[k,i]=100*(k+1)+float(i)
    
    win=TimeWindow(2,3)
    d=WindowData(ts,win)
    print('t y')
    for j in range(d.npts):
        print(d.time(j),d.data[j])
    assert(len(d.data) == 101)
    assert(d.t0==2.0)
    assert(d.endtime() == 3.0)
    d=WindowData(t3c,win)
    print('t x0 x1 x2')
    for j in range(d.npts):
        print(d.time(j),d.data[0,j],d.data[1,j],d.data[2,j])
    assert(d.data.columns() == 101)
    assert(d.t0==2.0)
    assert(d.endtime() == 3.0)
    print('testing error handling')
    t3c.kill()
    d=WindowData(t3c,win)
    assert(d.npts == 1000 and (not d.live))
    d=WindowData(ts,win,preserve_history=True)
    print('Error message posted')
    print(d.elog.get_error_log())
    assert(d.elog.size() == 1)
    # this still throws an error but the message will be different
    d=WindowData(ts,win,preserve_history=True,instance='0')
    print('Error message posted')
    print(d.elog.get_error_log())
    assert(d.elog.size() == 1)
Пример #5
0
def test_map_spark_and_dask():
    l = [get_live_timeseries() for i in range(5)]
    spark_res = spark_map(l)
    dask_res = dask_map(l)

    ts_cp = TimeSeries(l[0])
    res = signals.filter(ts_cp,
                         "bandpass",
                         freqmin=1,
                         freqmax=5,
                         preserve_history=True,
                         instance='0')
    assert np.isclose(spark_res[0].data, ts_cp.data).all()
    assert np.isclose(dask_res[0].data, ts_cp.data).all()
Пример #6
0
def maketsens(d, n=20, moveout=True, moveout_dt=0.05):
    """
    Makes a TimeSeries ensemble as copies of d.  If moveout is true
    applies a linear moveout to members using moveout_dt times
    count of member in ensemble.
    """
    # If python had templates this would be one because this and the
    # function below are identical except for types
    result = TimeSeriesEnsemble()
    for i in range(n):
        y = TimeSeries(d)  # this makes a required deep copy
        if (moveout):
            y.t0 += float(i) * moveout_dt
        result.member.append(y)
    return result
Пример #7
0
 def _deepcopy(self, d):
     """
     Private helper method for immediately above.   Necessary because 
     copy.deepcopy doesn't work with our pybind11 wrappers. There may be a
     fix, but for now we have to use copy constructors specific to each 
     object type.   
     """
     if (isinstance(d, TimeSeries)):
         return TimeSeries(d)
     elif (isinstance(d, Seismogram)):
         return Seismogram(d)
     elif (isinstance(d, TimeSeriesEnsemble)):
         return TimeSeriesEnsemble(d)
     elif (isinstance(d, SeismogramEnsemble)):
         return SeismogramEnsemble(d)
     else:
         raise RuntimeError(
             "SeismicPlotter._deepcopy:  received and unsupported data type=",
             type(d))
Пример #8
0
def test_map_spark_and_dask(spark_context):
    l = [get_live_timeseries() for i in range(5)]
    # add net, sta, chan, loc to avoid metadata serialization problem
    for i in range(5):
        l[i]["chan"] = "HHZ"
        l[i]["loc"] = "test_loc"
        l[i]["net"] = "test_net"
        l[i]["sta"] = "test_sta"
    spark_res = spark_map(l, spark_context)
    dask_res = dask_map(l)

    ts_cp = TimeSeries(l[0])
    res = signals.filter(ts_cp,
                         "bandpass",
                         freqmin=1,
                         freqmax=5,
                         object_history=True,
                         alg_id="0")
    assert np.isclose(spark_res[0].data, ts_cp.data).all()
    assert np.isclose(dask_res[0].data, ts_cp.data).all()
Пример #9
0
def make_wavelet_noise_data(nscale=0.1,
                            ns=2048,
                            padlength=512,
                            dt=0.05,
                            npoles=3,
                            corners=[0.08, 0.8]):
    wn = TimeSeries(ns)
    wn.t0 = 0.0
    wn.dt = dt
    wn.tref = TimeReferenceType.Relative
    wn.live = True
    nd = ns + 2 * padlength
    y = nscale * randn(nd)
    sos = signal.butter(npoles,
                        corners,
                        btype='bandpass',
                        output='sos',
                        fs=1.0 / dt)
    y = signal.sosfilt(sos, y)
    for i in range(ns):
        wn.data[i] = y[i + padlength]
    return (wn)
Пример #10
0
def test_operators():
    d = _CoreTimeSeries(10)
    d1 = make_constant_data_ts(d, nsamp=10)
    dsave = _CoreTimeSeries(d1)
    d = _CoreTimeSeries(6)
    d2 = make_constant_data_ts(d, t0=-0.2, nsamp=6, val=2.0)
    dsave = _CoreTimeSeries(d1)
    d1 += d2
    assert np.allclose(d1.data, [3, 3, 3, 3, 1, 1, 1, 1, 1, 1])
    d1 = _CoreTimeSeries(dsave)
    d = d1 + d2
    assert np.allclose(d.data, [3, 3, 3, 3, 1, 1, 1, 1, 1, 1])
    d1 = _CoreTimeSeries(dsave)
    d1 *= 2.5
    assert np.allclose(d1.data,
                       [2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5])
    d3 = TimeSeries(10)
    d4 = TimeSeries(6)
    d3 = make_constant_data_ts(d3, nsamp=10)
    d4 = make_constant_data_ts(d4, t0=-0.2, nsamp=6, val=2.0)
    dsave = _CoreTimeSeries(d3)
    d3 = TimeSeries(dsave)
    d3 += d4
    assert np.allclose(d3.data, [3, 3, 3, 3, 1, 1, 1, 1, 1, 1])
    d3 = TimeSeries(dsave)
    d = d3 + d4
    assert np.allclose(d.data, [3, 3, 3, 3, 1, 1, 1, 1, 1, 1])
    d1 = _CoreTimeSeries(dsave)
    d3 = TimeSeries(dsave)
    d3 *= 2.5
    assert np.allclose(d3.data,
                       [2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5])
    x = np.linspace(-0.7, 1.2, 20)
    for t in x:
        d3 = TimeSeries(dsave)
        d4.t0 = t
        d3 += d4
    # These are selected asserts of the incremental test above
    # visually d4 moves through d3 as the t0 value advance. Assert
    # tests end member: skewed left, inside, and skewed right
    d3 = TimeSeries(dsave)
    d4.t0 = -0.7  # no overlap test
    d3 += d4
    assert np.allclose(d3.data, [1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
    d3 = TimeSeries(dsave)
    d4.t0 = -0.3  # overlap left
    d3 += d4
    assert np.allclose(d3.data, [3, 3, 3, 1, 1, 1, 1, 1, 1, 1])
    d3 = TimeSeries(dsave)
    d4.t0 = 0.3  # d4 inside d3 test
    d3 += d4
    assert np.allclose(d3.data, [1, 1, 1, 3, 3, 3, 3, 3, 3, 1])
    d3 = TimeSeries(dsave)
    d4.t0 = 0.7  # partial overlap right
    d3 += d4
    assert np.allclose(d3.data, [1, 1, 1, 1, 1, 1, 1, 3, 3, 3])
    d3 = TimeSeries(dsave)
    d4.t0 = 1.0  # no overlap test right
    d3 += d4
    assert np.allclose(d3.data, [1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
    # Repeat the same test for Seismogram objects
    # This section is edited cut-paste of above
    # Intentionally do not test _CoreSeismogram directly because
    # currently if it works for Seismogram it will for _CoreSeismogram

    d = _CoreSeismogram(10)
    d1 = make_constant_data_seis(d, nsamp=10)
    dsave = _CoreSeismogram(d1)
    d = _CoreSeismogram(6)
    d2 = make_constant_data_seis(d, t0=-0.2, nsamp=6, val=2.0)
    dsave = _CoreSeismogram(d1)
    d1 += d2
    assert np.allclose(
        d1.data,
        np.array([
            [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
        ]),
    )
    d1 = _CoreSeismogram(dsave)
    d = d1 + d2
    assert np.allclose(
        d.data,
        np.array([
            [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
        ]),
    )
    d1 = _CoreSeismogram(dsave)
    d1 *= 2.5
    assert np.allclose(
        d1.data,
        np.array([
            [2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5],
            [2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5],
            [2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5],
        ]),
    )
    d3 = Seismogram(10)
    d4 = Seismogram(6)
    d3 = make_constant_data_seis(d3, nsamp=10)
    d4 = make_constant_data_seis(d4, t0=-0.2, nsamp=6, val=2.0)
    dsave = Seismogram(d3)
    d3 += d4
    assert np.allclose(
        d3.data,
        np.array([
            [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
        ]),
    )
    d3 = Seismogram(dsave)
    d = d3 + d4
    assert np.allclose(
        d.data,
        np.array([
            [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
        ]),
    )
    d3 = Seismogram(dsave)
    d3 *= 2.5
    assert np.allclose(
        d1.data,
        np.array([
            [2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5],
            [2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5],
            [2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5],
        ]),
    )
    x = np.linspace(-0.7, 1.2, 20)
    for t in x:
        d3 = Seismogram(dsave)
        d4.t0 = t
        d3 += d4

    # These are selected asserts of the incremental test above
    # visually d4 moves through d3 as the t0 value advance. Assert
    # tests end member: skewed left, inside, and skewed right
    d3 = Seismogram(dsave)
    d4.t0 = -0.7  # no overlap test
    d3 += d4
    assert np.allclose(
        d3.data,
        np.array([
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        ]),
    )

    d3 = Seismogram(dsave)
    d4.t0 = -0.3  # overlap left
    d3 += d4
    assert np.allclose(
        d3.data,
        np.array([
            [3, 3, 3, 1, 1, 1, 1, 1, 1, 1],
            [3, 3, 3, 1, 1, 1, 1, 1, 1, 1],
            [3, 3, 3, 1, 1, 1, 1, 1, 1, 1],
        ]),
    )
    d3 = Seismogram(dsave)
    d4.t0 = 0.3  # d4 inside d3 test
    d3 += d4
    assert np.allclose(
        d3.data,
        np.array([
            [1, 1, 1, 3, 3, 3, 3, 3, 3, 1],
            [1, 1, 1, 3, 3, 3, 3, 3, 3, 1],
            [1, 1, 1, 3, 3, 3, 3, 3, 3, 1],
        ]),
    )
    d3 = Seismogram(dsave)
    d4.t0 = 0.7  # partial overlap right
    d3 += d4
    assert np.allclose(
        d3.data,
        np.array([
            [1, 1, 1, 1, 1, 1, 1, 3, 3, 3],
            [1, 1, 1, 1, 1, 1, 1, 3, 3, 3],
            [1, 1, 1, 1, 1, 1, 1, 3, 3, 3],
        ]),
    )
    d3 = Seismogram(dsave)
    d4.t0 = 1.0  # no overlap test right
    d3 += d4
    assert np.allclose(
        d3.data,
        np.array([
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        ]),
    )

    # Repeat exactly for - test but different numeric results
    # just omit *= tests
    d = _CoreTimeSeries(10)
    d1 = make_constant_data_ts(d, nsamp=10)
    dsave = _CoreTimeSeries(d1)
    d = _CoreTimeSeries(6)
    d2 = make_constant_data_ts(d, t0=-0.2, nsamp=6, val=2.0)
    dsave = _CoreTimeSeries(d1)
    d1 -= d2
    assert np.allclose(d1.data, [-1, -1, -1, -1, 1, 1, 1, 1, 1, 1])
    d1 = _CoreTimeSeries(dsave)
    d = d1 - d2
    assert np.allclose(d.data, [-1, -1, -1, -1, 1, 1, 1, 1, 1, 1])
    d3 = TimeSeries(10)
    d4 = TimeSeries(6)
    d3 = make_constant_data_ts(d3, nsamp=10)
    d4 = make_constant_data_ts(d4, t0=-0.2, nsamp=6, val=2.0)
    dsave = _CoreTimeSeries(d3)
    d3 = TimeSeries(dsave)
    d3 -= d4
    assert np.allclose(d3.data, [-1, -1, -1, -1, 1, 1, 1, 1, 1, 1])
    d3 = TimeSeries(dsave)
    d = d3 - d4
    assert np.allclose(d.data, [-1, -1, -1, -1, 1, 1, 1, 1, 1, 1])
    x = np.linspace(-0.7, 1.2, 20)
    for t in x:
        d3 = TimeSeries(dsave)
        d4.t0 = t
        d3 -= d4
    # These are selected asserts of the incremental test above
    # visually d4 moves through d3 as the t0 value advance. Assert
    # tests end member: skewed left, inside, and skewed right
    d3 = TimeSeries(dsave)
    d4.t0 = -0.7  # no overlap test
    d3 -= d4
    assert np.allclose(d3.data, [1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
    d3 = TimeSeries(dsave)
    d4.t0 = -0.3  # overlap left
    d3 -= d4
    assert np.allclose(d3.data, [-1, -1, -1, 1, 1, 1, 1, 1, 1, 1])
    d3 = TimeSeries(dsave)
    d4.t0 = 0.3  # d4 inside d3 test
    d3 -= d4
    assert np.allclose(d3.data, [1, 1, 1, -1, -1, -1, -1, -1, -1, 1])
    d3 = TimeSeries(dsave)
    d4.t0 = 0.7  # partial overlap right
    d3 -= d4
    assert np.allclose(d3.data, [1, 1, 1, 1, 1, 1, 1, -1, -1, -1])
    d3 = TimeSeries(dsave)
    d4.t0 = 1.0  # no overlap test right
    d3 -= d4
    assert np.allclose(d3.data, [1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

    # Repeat the same test for Seismogram objects
    # This section is edited cut-paste of above
    # Intentionally do not test _CoreSeismogram directly because
    # currently if it works for Seismogram it will for _CoreSeismogram
    d = _CoreSeismogram(10)
    d1 = make_constant_data_seis(d, nsamp=10)
    dsave = _CoreSeismogram(d1)
    d = _CoreSeismogram(6)
    d2 = make_constant_data_seis(d, t0=-0.2, nsamp=6, val=2.0)
    dsave = _CoreSeismogram(d1)
    d1 -= d2
    assert np.allclose(
        d1.data,
        np.array([
            [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
        ]),
    )
    d1 = _CoreSeismogram(dsave)
    d = d1 - d2
    assert np.allclose(
        d.data,
        np.array([
            [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
        ]),
    )

    d3 = Seismogram(10)
    d4 = Seismogram(6)
    d3 = make_constant_data_seis(d3, nsamp=10)
    d4 = make_constant_data_seis(d4, t0=-0.2, nsamp=6, val=2.0)
    dsave = Seismogram(d3)
    d3 -= d4
    assert np.allclose(
        d3.data,
        np.array([
            [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
        ]),
    )
    d3 = Seismogram(dsave)
    d = d3 - d4
    assert np.allclose(
        d.data,
        np.array([
            [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
        ]),
    )

    x = np.linspace(-0.7, 1.2, 20)
    for t in x:
        d3 = Seismogram(dsave)
        d4.t0 = t
        d3 -= d4

    # These are selected asserts of the incremental test above
    # visually d4 moves through d3 as the t0 value advance. Assert
    # tests end member: skewed left, inside, and skewed right
    d3 = Seismogram(dsave)
    d4.t0 = -0.7  # no overlap test
    d3 -= d4
    assert np.allclose(
        d3.data,
        np.array([
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        ]),
    )
    d3 = Seismogram(dsave)
    d4.t0 = -0.3  # overlap left
    d3 -= d4
    assert np.allclose(
        d3.data,
        np.array([
            [-1, -1, -1, 1, 1, 1, 1, 1, 1, 1],
            [-1, -1, -1, 1, 1, 1, 1, 1, 1, 1],
            [-1, -1, -1, 1, 1, 1, 1, 1, 1, 1],
        ]),
    )
    d3 = Seismogram(dsave)
    d4.t0 = 0.3  # d4 inside d3 test
    d3 -= d4
    assert np.allclose(
        d3.data,
        np.array([
            [1, 1, 1, -1, -1, -1, -1, -1, -1, 1],
            [1, 1, 1, -1, -1, -1, -1, -1, -1, 1],
            [1, 1, 1, -1, -1, -1, -1, -1, -1, 1],
        ]),
    )
    d3 = Seismogram(dsave)
    d4.t0 = 0.7  # partial overlap right
    d3 -= d4
    assert np.allclose(
        d3.data,
        np.array([
            [1, 1, 1, 1, 1, 1, 1, -1, -1, -1],
            [1, 1, 1, 1, 1, 1, 1, -1, -1, -1],
            [1, 1, 1, 1, 1, 1, 1, -1, -1, -1],
        ]),
    )
    d3 = Seismogram(dsave)
    d4.t0 = 1.0  # no overlap test right
    d3 -= d4
    assert np.allclose(
        d3.data,
        np.array([
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        ]),
    )
Пример #11
0
def test_Ensemble(Ensemble):
    md = Metadata()
    md["double"] = 3.14
    md["bool"] = True
    md["long"] = 7
    es = Ensemble(md, 3)
    if isinstance(es, TimeSeriesEnsemble):
        d = TimeSeries(10)
        d = make_constant_data_ts(d)
        es.member.append(d)
        es.member.append(d)
        es.member.append(d)
    else:
        d = Seismogram(10)
        d = make_constant_data_seis(d)
        es.member.append(d)
        es.member.append(d)
        es.member.append(d)
    es.set_live(
    )  # new method for LoggingEnsemble needed because default is dead
    es.sync_metadata(["double", "long"])
    assert es.member[0].is_defined("bool")
    assert es.member[0]["bool"] == True
    assert not es.member[0].is_defined("double")
    assert not es.member[0].is_defined("long")
    es.sync_metadata()
    assert es.member[1].is_defined("double")
    assert es.member[1].is_defined("long")
    assert es.member[1]["double"] == 3.14
    assert es.member[1]["long"] == 7
    es.update_metadata(Metadata({"k": "v"}))
    assert es["k"] == "v"
    # From here on we test features not in CoreEnsemble but only in
    # LoggingEnsemble.   Note that we use pybind11 aliasing to
    # define TimeSeriesEnsemble == LoggingEnsemble<TimeSeries> and
    # SeismogramEnsemble == LoggingEnsemble<Seismogram>.
    # Should be initially marked live
    assert es.live()
    es.elog.log_error("test_ensemble", "test complaint",
                      ErrorSeverity.Complaint)
    es.elog.log_error("test_ensemble", "test invalid", ErrorSeverity.Invalid)
    assert es.elog.size() == 2
    assert es.live()
    es.kill()
    assert es.dead()
    # resurrect es
    es.set_live()
    assert es.live()
    # validate checks for for any live members - this tests that feature
    assert es.validate()
    # need this temporary copy for the next test_
    if isinstance(es, TimeSeriesEnsemble):
        escopy = TimeSeriesEnsemble(es)
    else:
        escopy = SeismogramEnsemble(es)
    for d in escopy.member:
        d.kill()
    assert not escopy.validate()
    # Reuse escopy for pickle test
    escopy = pickle.loads(pickle.dumps(es))
    assert escopy.is_defined("bool")
    assert escopy["bool"] == True
    assert escopy.is_defined("double")
    assert escopy.is_defined("long")
    assert escopy["double"] == 3.14
    assert escopy["long"] == 7
    assert escopy.live()
    assert escopy.elog.size() == 2
    assert escopy.member[0].is_defined("bool")
    assert escopy.member[0]["bool"] == True
    assert escopy.member[0].is_defined("double")
    assert escopy.member[0].is_defined("long")
    assert es.member[1].is_defined("double")
    assert es.member[1].is_defined("long")
    assert es.member[1]["double"] == 3.14
    assert es.member[1]["long"] == 7
    if isinstance(es, TimeSeriesEnsemble):
        assert es.member[1].data == escopy.member[1].data
    else:
        assert (es.member[1].data[:] == escopy.member[1].data[:]).all()
Пример #12
0
def test_TimeSeries():
    ts = TimeSeries()
    ts.npts = 100
    ts.t0 = 0.0
    ts.dt = 0.001
    ts.live = 1
    ts.tref = TimeReferenceType.Relative
    ts.data.append(1.0)
    ts.data.append(2.0)
    ts.data.append(3.0)
    ts.data.append(4.0)
    ts.sync_npts()
    assert ts.npts == 104
    assert ts.npts == ts["npts"]
    ts += ts
    for i in range(4):
        ts.data[i] = i * 0.5
    ts_copy = pickle.loads(pickle.dumps(ts))
    assert ts.data == ts_copy.data
    assert ts.data[3] == 1.5
    assert ts.data[103] == 8
    assert ts.time(100) == 0.1
    assert ts.sample_number(0.0998) == 100
Пример #13
0
def test_scale():
    dts=_CoreTimeSeries(9)
    dir=setbasics(dts,9)
    d3c=_CoreSeismogram(5)
    setbasics(d3c,5)
    dts.data[0]=3.0
    dts.data[1]=2.0
    dts.data[2]=-4.0
    dts.data[3]=1.0
    dts.data[4]=-100.0
    dts.data[5]=-1.0
    dts.data[6]=5.0
    dts.data[7]=1.0
    dts.data[8]=-6.0
    # MAD o=f above should be 2
    # perf of 0.8 should be 4
    # rms should be just over 10=10.010993957
    print('Starting tests for time series data of amplitude functions')
    ampmad=MADAmplitude(dts)
    print('MAD amplitude estimate=',ampmad)
    assert(ampmad==3.0)
    amprms=RMSAmplitude(dts)
    print('RMS amplitude estimate=',amprms)
    assert(round(amprms,2)==100.46)
    amppeak=PeakAmplitude(dts)
    ampperf80=PerfAmplitude(dts,0.8)
    print('Peak amplitude=',amppeak)
    print('80% clip level amplitude=',ampperf80)
    assert(amppeak==100.0)
    assert(ampperf80==6.0)
    print('Starting comparable tests for 3c data')
    d3c.data[0,0]=3.0
    d3c.data[0,1]=2.0
    d3c.data[1,2]=-4.0
    d3c.data[2,3]=1.0
    d3c.data[0,4]=np.sqrt(2)*(100.0)
    d3c.data[1,4]=-np.sqrt(2)*(100.0)
    ampmad=MADAmplitude(d3c)
    print('MAD amplitude estimate=',ampmad)
    amprms=RMSAmplitude(d3c)
    print('RMS amplitude estimate=',amprms)
    amppeak=PeakAmplitude(d3c)
    ampperf60=PerfAmplitude(d3c,0.6)
    print('Peak amplitude=',amppeak)
    print('60% clip level amplitude=',ampperf60)
    assert(amppeak==200.0)
    assert(ampperf60==4.0)
    assert(ampmad==3.0)
    amptest=round(amprms,2)
    assert(amptest==89.48)
    print('Trying scaling functions for TimeSeries')
    # we need a deep copy here since scaling changes the data
    d2=TimeSeries(dts)
    amp=_scale(d2,ScalingMethod.Peak,1.0)
    print('Computed peak amplitude=',amp)
    print(d2.data)
    d2=TimeSeries(dts)
    amp=_scale(d2,ScalingMethod.Peak,10.0)
    print('Computed peak amplitude with peak set to 10=',amp)
    print(d2.data)
    assert(amp==100.0)
    assert(d2.data[4]==-10.0)
    print('verifying scale has modified and set calib correctly')
    calib=d2.get_double('calib')
    assert(calib==10.0)
    d2=TimeSeries(dts)
    d2.put('calib',6.0)
    print('test 2 with MAD metric and initial calib of 6')
    amp=_scale(d2,ScalingMethod.MAD,1.0)
    calib=d2.get_double('calib')
    print('New calib value set=',calib)
    assert(calib==18.0)
    print('Testing 3C scale functions')
    d=Seismogram(d3c)
    amp=_scale(d,ScalingMethod.Peak,1.0)
    print('Peak amplitude returned by scale funtion=',amp)
    calib=d.get_double('calib')
    print('Calib value retrieved (assumed inital 1.0)=',calib)
    print('Testing python scale function wrapper - first on a TimeSeries with defaults')
    d2=TimeSeries(dts)
    amp=scale(d2)
    print('peak amplitude returned =',amp[0])
    assert(amp[0]==100.0)
    d=Seismogram(d3c)
    amp=scale(d)
    print('peak amplitude returned test Seismogram=',amp[0])
    assert(amp[0]==200.0)
    print('starting tests of scale on ensembles')
    print('first test TimeSeriesEnemble with 5 scaled copies of same vector used earlier in this test')
    ens=TimeSeriesEnsemble()
    scls=[2.0,4.0,1.0,10.0,5.0]  # note 4 s the median of this vector
    npts=dts.npts
    for i in range(5):
        d=TimeSeries(dts)
        for k in range(npts):
            d.data[k]*=scls[i]
        d.put('calib',1.0)
        ens.member.append(d)

    # work on a copy because scaling alters data in place
    enscpy=TimeSeriesEnsemble(ens)
    amps=scale(enscpy)
    print('returned amplitudes for members scaled individually')
    for i in range(5):
        print(amps[i])
        assert(amps[i]==100.0*scls[i])
    enscpy=TimeSeriesEnsemble(ens)
    amp=scale(enscpy,scale_by_section=True)
    print('average amplitude=',amp[0])
    #assert(amp[0]==4.0)
    avgamp=amp[0]
    for i in range(5):
        calib=enscpy.member[i].get_double("calib")
        print('member number ',i,' calib is ',calib)
        assert(round(calib)==400.0)
        #print(enscpy.member[i].data)

    # similar test for SeismogramEnsemble
    npts=d3c.npts
    ens=SeismogramEnsemble()
    for i in range(5):
        d=Seismogram(d3c)
        for k in range(3):
            for j in range(npts):
                d.data[k,j]*=scls[i]
        d.put('calib',1.0)
        ens.member.append(d)
    print('Running comparable tests on SeismogramEnsemble')
    enscpy=SeismogramEnsemble(ens)
    amps=scale(enscpy)
    print('returned amplitudes for members scaled individually')
    for i in range(5):
        print(amps[i])
        assert(round(amps[i])==round(200.0*scls[i]))
    print('Trying section scaling of same data')
    enscpy=SeismogramEnsemble(ens)
    amp=scale(enscpy,scale_by_section=True)
    print('average amplitude=',amp[0])
    assert(round(amp[0])==800.0)
    avgamp=amp[0]
    for i in range(5):
        calib=enscpy.member[i].get_double("calib")
        print('member number ',i,' calib is ',calib)
        assert(round(calib)==800.0)
Пример #14
0
def setup_function(function):
    ts_size = 255
    sampling_rate = 20.0

    function.dict1 = {
        "network": "IU",
        "station": "ANMO",
        "starttime": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000),
        "npts": ts_size,
        "sampling_rate": sampling_rate,
        "channel": "BHE",
        "live": True,
        "_id": bson.objectid.ObjectId(),
        "jdate": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000),
        "date_str": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000),
        "not_defined_date": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59,
                                              915000),
    }
    function.dict2 = {
        "network": "IU",
        "station": "ANMO",
        "starttime": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000),
        "npts": ts_size,
        "sampling_rate": sampling_rate,
        "channel": "BHN",
    }
    function.dict3 = {
        "network": "IU",
        "station": "ANMO",
        "starttime": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000),
        "npts": ts_size,
        "sampling_rate": sampling_rate,
        "channel": "BHZ",
    }
    function.tr1 = obspy.Trace(data=np.random.randint(0, 1000, ts_size),
                               header=function.dict1)
    function.tr2 = obspy.Trace(data=np.random.randint(0, 1000, ts_size),
                               header=function.dict2)
    function.tr3 = obspy.Trace(data=np.random.randint(0, 1000, ts_size),
                               header=function.dict3)
    function.stream = obspy.Stream(
        traces=[function.tr1, function.tr2, function.tr3])

    function.md1 = Metadata()
    function.md1.put("network", "IU")
    function.md1.put("npts", ts_size)
    function.md1.put("sampling_rate", sampling_rate)
    function.md1.put("live", True)

    function.ts1 = TimeSeries()
    function.ts1.data = DoubleVector(np.random.rand(ts_size))
    function.ts1.live = True
    function.ts1.dt = 1 / sampling_rate
    function.ts1.t0 = 0
    function.ts1.npts = ts_size
    # TODO: need to bind the constructor that can do TimeSeries(md1)
    function.ts1.put("net", "IU")
    function.ts1.put("npts", ts_size)
    function.ts1.put("sampling_rate", sampling_rate)

    function.seismogram = Seismogram()
    # TODO: the default of seismogram.tref is UTC which is inconsistent with the default
    # for TimeSeries()
    # TODO: It would be nice to have dmatrix support numpy.ndarray as input
    function.seismogram.data = dmatrix(3, ts_size)
    for i in range(3):
        for j in range(ts_size):
            function.seismogram.data[i, j] = np.random.rand()

    function.seismogram.live = True
    function.seismogram.dt = 1 / sampling_rate
    function.seismogram.t0 = 0
    function.seismogram.npts = ts_size
    # FIXME: if the following key is network, the Seismogram2Stream will error out
    # when calling TimeSeries2Trace internally due to the issue when mdef.is_defined(k)
    # returns True but k is an alias, the mdef.type(k) will error out.
    function.seismogram.put("net", "IU")
    function.seismogram.put("npts", ts_size)
    function.seismogram.put("sampling_rate", sampling_rate)
Пример #15
0
def make_simulation_wavelet(n=100,
                            dt=0.05,
                            t0=-1.0,
                            imp=(20.0, -15.0, 4.0, -1.0),
                            lag=(20, 24, 35, 45),
                            npoles=3,
                            corners=[2.0, 6.0]):
    dvec = make_impulse_vector(lag, imp, n)
    fsampling = int(1.0 / dt)
    sos = signal.butter(npoles,
                        corners,
                        btype='bandpass',
                        output='sos',
                        fs=fsampling)
    f = signal.sosfilt(sos, dvec)
    wavelet = TimeSeries(n)
    wavelet.set_t0(t0)
    wavelet.set_dt(dt)
    # This isn't necessary at the moment because relative is the default
    #wavelet.set_tref(TimeReferenceType.Relative)
    wavelet.set_npts(n)
    wavelet.set_live()
    for i in range(n):
        wavelet.data[i] = f[i]
    return wavelet
Пример #16
0
def get_live_timeseries():
    ts = TimeSeries()
    ts.set_live()
    ts.dt = 1 / sampling_rate
    ts.npts = ts_size
    # ts.put('net', 'IU')
    ts.put('npts', ts_size)
    ts.put('sampling_rate', sampling_rate)
    ts.tref = TimeReferenceType.UTC
    ts.t0 = datetime.utcnow().timestamp()
    ts['delta'] = 0.1
    ts['calib'] = 0.1
    ts['site_id'] = bson.objectid.ObjectId()
    ts['channel_id'] = bson.objectid.ObjectId()
    ts['source_id'] = bson.objectid.ObjectId()
    ts.set_as_origin('test', '0', '0', AtomicType.TIMESERIES)
    ts.data = DoubleVector(np.random.rand(ts_size))
    return ts
Пример #17
0
def get_sin_timeseries():
    ts = TimeSeries()
    ts.set_live()
    ts.dt = 1 / sampling_rate
    ts.npts = ts_size
    # ts.put('net', 'IU')
    ts.put("npts", ts_size)
    ts.put("sampling_rate", sampling_rate)
    ts.tref = TimeReferenceType.UTC
    ts.t0 = datetime.utcnow().timestamp()
    ts["delta"] = 0.1
    ts["calib"] = 0.1
    ts["site_id"] = bson.objectid.ObjectId()
    ts["channel_id"] = bson.objectid.ObjectId()
    ts["source_id"] = bson.objectid.ObjectId()
    ts.set_as_origin("test", "0", "0", AtomicType.TIMESERIES)
    curve = np.linspace(0, 2 * np.pi, ts.npts)
    curve = np.sin(curve) + 0.2 * np.sin(10 * curve)
    ts.data = DoubleVector(curve)
    return ts
Пример #18
0

def read_data(d):
    di = d.get_string("dir")
    dfile = d.get_string("dfile")
    foff = d.get("foff")
    fname = os.path.join(di, dfile)
    with open(fname, mode="rb") as fh:
        fh.seek(foff)
        float_array = array("d")
        float_array.frombytes(fh.read(d.get("nofbytes")))
        d.data = DoubleVector(float_array)


if __name__ == "__main__":
    s = TimeSeries()
    s.data = DoubleVector(np.random.rand(255))
    s["dir"] = "./"
    s["dfile"] = "test_op"
    save_data(s)

    s2 = TimeSeries()
    for k in s:
        s2[k] = s[k]
    s2.data = DoubleVector([])
    print(len(s2.data))
    read_data(s2)
    print(len(s2.data))
    assert all(a == b for a, b in zip(s.data, s2.data))
    # client = MongoClient('localhost', 27017)
    # db = client.mspass
Пример #19
0
def Trace2TimeSeries(trace, history=None):
    """
    Convert an obspy Trace object to a TimeSeries object.

    An obspy Trace object mostly maps directly into the mspass TimeSeries
    object with the stats of Trace mapping (almost) directly to the TimeSeries
    Metadata object that is a base class to TimeSeries.  A deep copy of the
    data vector in the original Trace is made to the result. That copy is
    done in C++ for speed (we found a 100+ fold speedup using that mechanism
    instead of a simple python loop)  There is one important type collision
    in copying obspy starttime and endtime stats fields.  obspy uses their
    UTCDateTime object to hold time but TimeSeries only supports an epoch
    time (UTCDateTime.timestamp) so the code here has to convert from the
    UTCDateTime to epoch time in the TimeSeries.  Note in a TimeSeries
    starttime is the t0 attribute.

    The biggest mismatch in Trace and TimeSeries is that Trace has no concept
    of object level history as used in mspass.   That history must be maintained
    outside obspy.  To maintain full history the user must pass the
    history maintained externally through the optional history parameter.
    The contents of history will be loaded directly into the result with
    no sanity checks.

    :param trace: obspy trace object to convert
    :type trace: :class:`~obspy.core.trace.Trace`
    :param history:  mspass ProcessingHistory object to post to result.
    :return: TimeSeries object derived from obpsy input Trace object
    :rtype: :class:`~mspasspy.ccore.TimeSeries`
    """
    # The obspy trace object stats attribute only acts like a dictionary
    # we can't use it directly but this trick simplifies the copy to
    # mesh with py::dict for pybind11 - needed in TimeSeries constructor below
    h = dict(trace.stats)
    # These tests are excessively paranoid since starttime and endtime
    # are required attributes in Trace, but better save in case
    # someone creates one outside obspy
    if Keywords.starttime in trace.stats:
        t = h[Keywords.starttime]
        h[Keywords.starttime] = t.timestamp
    else:
        # We have to set this to something if it isn't set or
        # the TimeSeries constructor may abort
        h[Keywords.starttime] = 0.0
    # we don't require endtime in TimeSeries so ignore if it is not set
    if "endtime" in trace.stats:
        t = h["endtime"]
    h["endtime"] = t.timestamp
    #
    # these define a map of aliases to apply when we convert to mspass
    # metadata from trace - we redefined these names but others could
    # surface as obspy evolves independently from mspass
    mspass_aliases = dict()
    mspass_aliases["station"] = Keywords.sta
    mspass_aliases["network"] = Keywords.net
    mspass_aliases["location"] = Keywords.loc
    mspass_aliases["channel"] = Keywords.chan
    for k in mspass_aliases:
        if k in h:
            x = h.pop(k)
            alias_key = mspass_aliases[k]
            h[alias_key] = x
    dout = TimeSeries(h, trace.data)
    if history != None:
        dout.load_history(history)
    dout.set_live()
    # The following dead_mspass attribute is used by our decorator API
    # to determine whether an object was dead before the conversion.
    try:
        if trace.dead_mspass:
            dout.live = False
    except AttributeError:
        pass
    return dout
Пример #20
0
def Seismogram2Stream(sg,
                      chanmap=["E", "N", "Z"],
                      hang=[90.0, 0.0, 0.0],
                      vang=[90.0, 90.0, 0.0]):
    # fixme hang and vang parameters
    """
    Convert a mspass::Seismogram object to an obspy::Stream with 3 components split apart.

    mspass and obspy have completely incompatible approaches to handling three
    component data.  obspy uses a Stream object that is a wrapper around and
    a list of Trace objects.  mspass stores 3C data bundled into a matrix
    container.   This function takes the matrix container apart and produces
    the three Trace objects obspy want to define 3C data.   The caller is
    responsible for how they handle bundling the output.

    A very dark side of this function is any error log entries in the part
    mspass Seismogram object will be lost in this conversion as obspy
    does not implement that concept.  If you need to save the error log
    you will need to save the input of this function to MongoDB to preserve
    the errorlog it may contain.

    :param sg: is the Seismogram object to be converted
    :type sg: :class:`~mspasspy.ccore.Seismogram`
    :param chanmap:  3 element list of channel names to be assigned components
    :type chanmap: list
    :param hang:  3 element list of horizontal angle attributes (azimuth in degrees)
      to be set in Stats array of output for each component.  (default is
      for cardinal directions)
    :type hang: list
    :param vang:  3 element list of vertical angle (theta of spherical coordinates)
      to be set in Stats array of output for each component.  (default is
      for cardinal directions)
    :type vang: list
    :return: obspy Stream object containing a list of 3 Trace objects in
       mspass component order. Presently the data are ALWAYS returned to
       cardinal directions (see above). It will be empty if sg was marked dead
    :rtype: :class:`obspy.core.stream.Stream`
    """
    dresult = obspy.core.Stream()
    dresult.dead_mspass = True
    # Note this logic will silently return an empty Stream object if the
    # data are marked dead
    if sg.live:
        dresult.dead_mspass = False
        uuids = sg.id()
        logstuff = sg.elog
        for i in range(3):
            ts = ExtractComponent(sg, i)
            ts.put_string(Keywords.chan, chanmap[i])
            ts.put_double(Keywords.channel_hang, hang[i])
            ts.put_double(Keywords.channel_vang, vang[i])
            # ts is a CoreTimeSeries but we need to add a few things to
            # make it mesh with TimeSeries2Trace
            tsex = TimeSeries(ts, uuids)
            tsex.elog = logstuff
            dobspy = TimeSeries2Trace(tsex)
            dresult.append(dobspy)
    else:
        for i in range(3):
            tc = obspy.core.Trace()
            tc.dead_mspass = True
            dresult.append(tc)
    return dresult
Пример #21
0
def arrival_snr_QC(
    data_object,
    noise_window=TimeWindow(-130.0, -5.0),
    noise_spectrum_engine=None,
    signal_window=TimeWindow(-5.0, 120.0),
    signal_spectrum_engine=None,
    band_cutoff_snr=2.0,
    # check these are reasonable - don't remember the formula when writing this
    tbp=5.0,
    ntapers=10,
    high_frequency_search_start=5.0,
    poles=3,
    perc=95.0,
    phase_name="P",
    metadata_key="Parrival",
    optional_metrics=[
        "snr_stats",
        "filtered_envelope",
        "filtered_L2",
        "filtered_Linf",
        "filtered_MAD",
        "filtered_perc",
    ],
    save_spectra=False,
    db=None,
    collection="arrival",
    use_measured_arrival_time=False,
    measured_arrival_time_key="Ptime",
    taup_model=None,
    update_mode=False,
    component=2,
    source_collection="source",
    receiver_collection=None,
):
    """
    Compute a series of metrics that can be used for quality control
    filtering of seismic phase data.

    This is the highest level function in this module for computing
    signal-to-noise ratio metrics for processing signals that can be
    defined by a computable or measurable "phase".  Features this
    function adds over lower level functions in this module are:
        1.  An option to save computed metrics to a MongoDB collection
            (defaults as "arrival").  If the update_mode argument is
            set True (default is False) the function expects the data_object
            to contain the attribute "arrival_id" that references the
            ObjectID of an existing entry in the the collection where the
            data this function computes is to be saved (default is"arrival").
        2.  Adds an option to use a computed or measured arrival as the
            time reference for all windowing.   The lower level snr
            functions in this module require the user do what this
            function does prior to calling the function.  Note one or the other is required
            (i.e. either computed or measured time will be define t0 of the
             processing)

    The input of arg 0 (data_object) can be either a TimeSeries or
    a Seismogram object.  If a Seismogram object is passed the "component"
    argument is used to extract the specified single channel from the Seismogram
    object and than component is used for processing.  That is necessary
    because all the algorithms used are single channel algorithms.  To
    use this function on all components use a loop over components BUT
    make sure you use a unique value for the argument "metadata_key" for
    each component.  Note this will also produce multiple documents per
    input datum.

    The type of the data_object also has a more subtle implication the
    user must be aware of.  That is, in the MsPASS schema we store receiver coordinates
    in one of two different collections:  "channel" for TimeSeries data and
    "site" for Seismogram data.  When such data are loaded the generic keys
    like lat are always converted to names like channel_lat or site_lat
    for TimeSeries and Seismogram data respectively.   This function uses
    the data type to set that naming.  i.e. if the input is TimeSeries
    it tries to fetch the latitude data as channel_lat while if it the input
    is a Seismogram it tries to fetch site_lat.   That is true of all coordinate
    data loaded by normalization from a source and receiver collection.

    The following args are passed directly to the function arrival_snr:
    noise_window, signal_window, band_cutoff_snr, tbp, ntapers, poles,
    perc, phase_name, metadata_key, and optional_metrics.  See the docstring
    for arrival_snr and FD_snr_estimator for descriptions of how these
    arguments should be used.  This top level function adds arguments
    decribed below.

    :param db:  mspass Database object that is used as a handle for to MongoDB.
    Default is None, which the function takes to mean you don't want to
    save the computed values to MongoDB.   In this mode the computed
    metrics will all be posted to a python dict that can be found under the
    key defined by the "metadata_key" argument.   When db is defined the
    contents of that same python dict will save to MongoDB is the
    collection defined by the "collection" argument.  If db is run as
    the default None the user is responsible for saving and managing the
    computed snr data.   Be aware a simple later call to db.save_data
    will not produce the same normalized data with the (default) arrival
    collection.

    :param collection:  MongoDB collection name where the results of this
    function will be saved.  If the "update_mode" argument is also set
    True the update section will reference this collection. Default is "arrival".

    :param use_measured_arrival_time:  boolean defining the method used to
    define the time reference for windowing used for snr calculations.
    When True the function will attempt to fetch a phase arrival time with
    the key defined by the "measured_arrival_time_key" argument.  In that
    mode if the fetch fails the data_object will be killed and an error
    posted to elog.   That somewhat brutal choice was intentional as the
    expectation is if you want to use measured arrival times you don't
    want data where there are no picks.   The default is True to make
    the defaults consistent.  The reason is that the tau-p calculator
    handle is passed to the function when using model-based travel times.
    There is no way to default that so it defaults to None.

    :param measured_arrival_time_key: is the key used to fetch a
    measured arrival time.   This parameter is ignored if use_measured_arrival_time
    is False.

    :param taup_model: when use_measured_arrival_time is False this argument
    is required.  It defaults as None because there is now way the author
    knows to initialize it to anything valid.  If set it MUST be an instance
    of the obspy class TauPyModel (https://docs.obspy.org/packages/autogen/obspy.taup.tau.TauPyModel.html#obspy.taup.tau.TauPyModel)
    Mistakes in use of this argument can cause a MsPASSError exception to
    be thrown (not logged thrown as a fatal error) in one of two ways:
    (1)  If use_measured_arrival_time is False this argument must be defined,
    and (2) if it is defined it MUST be an instance of TauPyModel.

    :param update_mode:   When True the function will attempt to extract
    a MongoDB ObjectID from data_object's Metadata using the (currently fixed)
    key "arrival_id".   If found it will add the computed data to an existing
    document in the collection defined by the collection argument.  Otherwise
    it will simply add a new entry and post the ObjectID of the new document
    with the (same fixed) key arrival_id.  When False no attempt to fetch
    the arrival id is made and we simply add a record.  This parameter is
    completely ignored unless the db argument defines a valid Database class.

    :param component: integer (0, 1, or 2) defining which component of a
    Seismogram object to use to compute the requested snr metrics.   This
    parameter is ignored if the input is a TimeSeries.

    :param source_collection:  normalization collection for source data.
    The default is the MsPASS name "source" which means the function will
    try to load the source hypocenter coordinates (when required) as
    source_lat, source_lon, source_depth, and source_time.

    :param receiver_collection:  when set this name will override the
    automatic setting of the expected normalization collection naming
    for receiver functions (see above).  The default is None which causes
    the automatic switching to be involked.  If it is any other string
    the automatic naming will be overridden.

    :return:  the data_object modified by insertion of the snr QC data
    in the object's Metadata
    """
    if data_object.dead():
        return data_object
    if isinstance(data_object, TimeSeries):
        # We need to make a copy of a TimeSeries object to assure the only
        # thing we change is the Metadata we add to the return
        data_to_process = TimeSeries(data_object)
        if receiver_collection:
            rcol = receiver_collection
        else:
            rcol = "channel"
    elif isinstance(data_object, Seismogram):
        if component < 0 or component > 2:
            raise MsPASSError(
                "arrival_snr_QC:  usage error.  " +
                "component parameter passed with illegal value={n}\n".format(
                    n=component) + "Must be 0, 1, or 2",
                ErrorSeverity.Fatal,
            )
        data_to_process = ExtractComponent(data_object, component)
        if receiver_collection:
            rcol = receiver_collection
        else:
            rcol = "site"
    else:
        raise MsPASSError(
            "arrival_snr_QC:   received invalid input data\n" +
            "Input must be either TimeSeries or a Seismogram object",
            ErrorSeverity.Fatal,
        )
    if use_measured_arrival_time:
        arrival_time = data_object[measured_arrival_time_key]
    else:
        # This test is essential or python will throw a more obscure,
        # generic exception
        if taup_model is None:
            raise MsPASSError(
                "arrival_snr_QC:  usage error.  " +
                "taup_model parameter is set None but use_measured_arrival_time is False\n"
                +
                "This gives no way to define processing windows.  See docstring",
                ErrorSeverity.Fatal,
            )
        source_lat = data_object[source_collection + "_lat"]
        source_lon = data_object[source_collection + "_lon"]
        source_depth = data_object[source_collection + "_depth"]
        source_time = data_object[source_collection + "_time"]
        receiver_lat = data_object[rcol + "_lat"]
        receiver_lon = data_object[rcol + "_lon"]
        delta = locations2degrees(source_lat, source_lon, receiver_lat,
                                  receiver_lon)
        arrival = taup_model.get_travel_times(
            source_depth_in_km=source_depth,
            distance_in_degree=delta,
            phase_list=[phase_name],
        )
        arrival_time = source_time + arrival[0].time
        taup_arrival_phase = arrival[0].phase.name
        # not sure if this will happen but worth trapping it as a warning if
        # it does
        if phase_name != taup_arrival_phase:
            data_object.elog.log_error(
                "arrival_snr_QC",
                "Requested phase name=" + phase_name +
                " does not match phase name tag returned by obpsy taup calculator="
                + taup_arrival_phase,
                "Complaint",
            )
    if data_to_process.time_is_UTC():
        data_to_process.ator(arrival_time)
    [snrdata, elog] = FD_snr_estimator(
        data_to_process,
        noise_window,
        noise_spectrum_engine,
        signal_window,
        signal_spectrum_engine,
        band_cutoff_snr,
        tbp,
        ntapers,
        high_frequency_search_start,
        poles,
        perc,
        optional_metrics,
        save_spectra=save_spectra,
    )
    if elog.size() > 0:
        data_object.elog += elog
    snrdata["phase"] = phase_name
    snrdata["snr_arrival_time"] = arrival_time
    snrdata["snr_signal_window_start"] = arrival_time + signal_window.start
    snrdata["snr_signal_window_end"] = arrival_time + signal_window.end
    snrdata["snr_noise_window_start"] = arrival_time + noise_window.start
    snrdata["snr_noise_window_end"] = arrival_time + noise_window.end

    # These cross-referencing keys may not always be defined when a phase
    # time is based on a pick so we add these cautiously
    scol_id_key = source_collection + "_id"
    rcol_id_key = rcol + "_id"
    if data_object.is_defined(scol_id_key):
        snrdata[scol_id_key] = data_object[scol_id_key]
    if data_object.is_defined(rcol_id_key):
        snrdata[rcol_id_key] = data_object[rcol_id_key]
    # Note we add this result to data_object NOT data_to_process because that
    # is not always the same thing - for a TimeSeries input it is a copy of
    # the original but it may have been altered while for a Seismogram it is
    # an extracted component
    data_object[metadata_key] = snrdata
    if db:
        arrival_id_key = collection + "_id"
        dbcol = db[collection]
        if update_mode:
            if data_object.is_defined(arrival_id_key):
                arrival_id = data_object[arrival_id_key]
                filt = {"_id": arrival_id}
                update_clause = {"$set": snrdata}
                dbcol.update_one(filt, update_clause)
            else:
                data_object.elog.log_error(
                    "arrival_snr_QC",
                    "Running in update mode but arrival id key=" +
                    arrival_id_key + " is not defined\n" +
                    "Inserting computed snr data as a new document in collection="
                    + collection,
                    "Complaint",
                )
                arrival_id = dbcol.insert_one(snrdata).inserted_id
                data_object[arrival_id_key] = arrival_id
        else:
            arrival_id = dbcol.insert_one(snrdata).inserted_id
            data_object[arrival_id_key] = arrival_id
    return data_object
Пример #22
0
def FD_snr_estimator(
    data_object,
    noise_window=TimeWindow(-130.0, -5.0),
    noise_spectrum_engine=None,
    signal_window=TimeWindow(-5.0, 120.0),
    signal_spectrum_engine=None,
    band_cutoff_snr=2.0,
    # check these are reasonable - don't remember the formula when writing this
    tbp=2.5,
    ntapers=4,
    high_frequency_search_start=5.0,
    poles=3,
    perc=95.0,
    optional_metrics=None,
    save_spectra=False,
):
    # optional_metrics=['snr_stats','filtered_envelope','filtered_L2','filtered_Linf','filtered_MAD','filtered_perc']):
    """
    Estimates one or more metrics of signal-to-noise from a TimeSeries object.
    An implicit assumption is that the analysis is centered on a timeable "phase"
    like P, PP, etc.

    This is a python function that can be used to compute one or several
    signal-to-noise ratio estimates based on an estimated bandwidth using
    the C++ function EstimateBandwidth.  The function has a fair number of
    options, but the core metrics computed are the bandwidth estimates
    computed by that function.  It uses a fairly simple search algorithm
    that functions well for most earthquake sources.  For the low end the
    algorithm searches from the first frequency indistinguishable from DC to
    find the lowest frequency for which the snr exceeds a threshold specified
    by the input parameter 'band_cutoff_snr'.   It does a similar search
    from the high end from a point 80% of Nyquist - a good choice for all
    modern digital data that use FIR antialias filters.   Both searches are
    not just defined with just the first frequency to satisfy the snr
    threshold criteria.  Only when a group of frequencies more than 2 times
    the time-bandwidth product exceed the threshold is the band edge
    defined.   The actual band edge is then defined as the first frequency
    exceeding the threshold.  That more elaborate algorithm was used to
    prevent pure lines in either the signal or noise spectrum from
    corrupting the estimates.

    A set of optional metrics can be computed.  All optional metrics use
    the bandwidth estimates in one way or another.   Optional metrics are
    defined by the following keywords passed through a list (actually
    any iterable container will work) of strings defining one or more
    of the keywords. The metrics and a brief description of each follow:

    *snr_stats* computes what are commonly plotted in box plots for the
    snr estimates within the estimated bandwidth:  minimum, maximum,
    0.25 (1/4) point, 0.75 (3/4) point, and the median.   These are set
    with following dict keys:   'snr_band_maximum','snr_band_minimum',
    'snr_band_1/4', 'srn_band_3/4', and 'snr_band_median' respectively.

    *filtered_envelope*, *filtered_L2*, *filtered_Linf*, *filtered_perc*, and *filtered_MAD*:
    All of these optional metrics first copy the data_object and then
    filter the copy with a Butterworth bandpass filter with the number of
    poles specified by the npoles argument and corners at the estimated
    band edge by the EstimateBandwidth function.   The metrics computed
    are time domain snr estimates computed with he filtered data.  They are
    actually computed from functions in this same module that can be
    used independently and have their own docstring description. The
    functions called have the following names in order of the keyword
    list above:  *snr_envelope*, *snr_L2*, *snr_Linv*, and *snr_MAD*.
    When the computed they are set in the output dictionary with the
    following (again in order) keys:  'snr_envelope','snr_L2', 'srn_Linf',
    and 'snr_MAD'.

    :param data_object:  TimeSeries object to be processed. For Seismogram
    objects the assumption is algorithm would be used for a single
    component (e.g longitudinal or vertical for a P phase)

    :param noise_window: defines the time window to use for computing the
    spectrum considered noise. The time span can be either relative or
    UTC (absolute) time but we do not check for consistency.  This low
    level function assumes they are consistent.  If not, the calculations
    are nearly guaranteed to fail.  Type must be mspasspy.ccore.TimeWindow.

    :param signal_window: defines the time window to use that defines what
    you consider "the signal".  The time span can be either relative or
    UTC (absolute) time but we do not check for consistency.  This low
    level function assumes they are consistent.  If not, the calculations
    are nearly guaranteed to fail.  Type must be mspasspy.ccore.TimeWindow.

    :param noise_spectrum_engine: is expected to either by a None type
    or an instance of a ccore object called an MTPowerSpectralEngine.
    When None an instance of MTPowerSpectralEngine is computed for
    each call to this function.   That is a convenience for small
    jobs or when called with data from mixed sample rates and/or variable
    length time windows.   It is very inefficient to use the default
    approach for processing large data sets and really for any use in a
    map operation with dask or spark.  Normal use should be for the user to
    predefine an MtPowerSpectralEngine from the expected window size
    for a given data sample rate and include it in the function call.

    :param signal_spectrum_engine:  is the comparable MTPowerSpectralEngine
    to use to compute the signal power spectrum.   Default is None with the
    same caveat as above for the noise_spectrum_engine.

    :param tbp:  time-bandwidth product to use for computing the set of
    Slepian functions used for the multitaper estimator.  This parameter is
    used only if the noise_spectrum_engine or signal_spectrum_engine
    arguments are set as None.  The default is 2.5

    :param ntapers:  is the number of Slepian functions (tapers) to compute
    for the multitaper estimators. Like tbp it is referenced only if
    noise_spectrum_engine or signal_spectrum_engine are set to None.
    Note the function will throw an exception if the ntaper parameter is
    not consistent with the time-bandwidth product.  That is, the
    maximum number of tapers is round(2*tbp-1).   Default is 4 which is
    consistent with default tbp=2.5

    :param high_frequency_search_start: Used to specify the upper frequency
      used to start the search for the upper end of the bandwidth by
      the function EstimateBandwidth.  Default is 4.0 which reasonable for
      teleseismic P wave data.  Should be change for usage other than
      analysis of teleseimic P phases or you the bandwidth may be
      grossly underestimated.

    :param npoles:   defines number of poles to us for the Butterworth
    bandpass applied for the "filtered" metrics (see above).  Default is 3.

    :param perc:   used only if 'filtered_perc' is in the optional metrics list.
    Specifies the perc parameter as used in seismic unix.  Uses the percentage
    point specified of the sorted abs of all amplitudes.  (Not perc=50.0 is
    identical to MAD)  Default is 95.0 which is 2 sigma for Gaussian noise.

    :param optional_metrics: is an iterable container containing one or more
    of the optional snr metrics discussed above.

    :param store_as_subdocument:  This parameter is included for
    flexibility but should not normally be changed by the user.  As noted
    earlier the outputs of this function are best abstracted as Metadata.
    When this parameter is False the Metadata members are all posted with
    directly to data_object's Metadata container.  If set True the
    internally generated python dict is copied and stored with a key
    defined through the subdocument_key argument.  See use below in
    function arrival_snr.

    :param subdocument_key:  key for storing results as a subdocument.
    This parameter is ignored unless store_as_subdocument is True.
    Default is "snr_data"

    :param save_spectra:   If set True (default is False) the function
    will pickle the computed noise and signal spectra and save the
    strings created along with a set of related metadata defining the
    time range to the output python dict (these will be saved in MongoDB
    when db is defined - see below).   This option should ONLY be used
    for spot checking, discovery of why an snr metric has unexpected
    results using graphics, or a research topic where the spectra would
    be of interest.  It is a very bad idea to turn this option on if
    you are processing a large quantity of data and saving the results
    to MongoDB as it will bloat the arrival collection.  Consider a
    different strategy if that essential for your work.

    :return:  python tuple with two components.  0 is a python dict with
    the computed metrics associated with keys defined above.  1 is a
    mspass.ccore.ErrorLogger object. Any errors in computng any of the
    metrics will be posted to this logger.  Users should then test this
    object using it's size() method and if it the log is not empty (size >0)
    the caller should handle that condition.   For normal use that means
    pushing any messages the log contains to the original data object's
    error log.
    """
    algname = "FN_snr_estimator"
    my_logger = ErrorLogger()
    # For this algorithm we dogmatically demand the input be a TimeSeries
    if not isinstance(data_object, TimeSeries):
        raise MsPASSError(
            "FD_snr_estimator:  Received invalid data object - arg0 data must be a TimeSeries",
            ErrorSeverity.Invalid,
        )
    # MTPowerSpectrum at the moment has an issue with how it handles
    # a user error in specifying time-band product and number of tapers.
    # We put in an explicit trap here and abort if the user makes a mistake
    # to avoid a huge spray of error message
    if ntapers > round(2 * tbp):
        message = (
            algname +
            "(Fatal Error):  ntapers={ntapers} inconsistent with tbp={tbp}\n".
            format(ntapers=ntapers, tbp=tbp))
        message += "ntapers must be >= round(2*tbp)"
        raise MsPASSError(message, ErrorSeverity.Fatal)
    if data_object.dead():
        my_logger.log_error(
            algname,
            "Datum received was set dead - cannot compute anything",
            ErrorSeverity.Invalid,
        )
        return [dict(), my_logger]
    # We enclose all the main code here in a try block and cat any MsPASSErrors
    # they will be posted as log message. Others will not be handled
    # intentionally letting python's error mechanism handle them as
    # unexpected exceptions - MsPASSError can be anticipated for data problems
    snrdata = dict()
    try:
        # First extract the required windows and compute the power spectra
        n = WindowData(data_object, noise_window.start, noise_window.end)
        s = WindowData(data_object, signal_window.start, signal_window.end)
        if noise_spectrum_engine:
            nengine = noise_spectrum_engine
        else:
            nengine = MTPowerSpectrumEngine(n.npts, tbp, ntapers)
        if signal_spectrum_engine:
            sengine = signal_spectrum_engine
        else:
            sengine = MTPowerSpectrumEngine(n.npts, tbp, ntapers)
        N = nengine.apply(n)
        S = sengine.apply(s)
        bwd = EstimateBandwidth(S.df, S, N, band_cutoff_snr, tbp,
                                high_frequency_search_start)
        # These estimates are always computed and posted
        snrdata["low_f_band_edge"] = bwd.low_edge_f
        snrdata["high_f_band_edge"] = bwd.high_edge_f
        snrdata["low_f_band_edge_snr"] = bwd.low_edge_snr
        snrdata["high_f_band_edge_snr"] = bwd.high_edge_snr
        snrdata["spectrum_frequency_range"] = bwd.f_range
        snrdata["bandwidth_fraction"] = bwd.bandwidth_fraction()
        snrdata["bandwidth"] = bwd.bandwidth()
        if save_spectra:
            snrdata["signal_spectrum"] = pickle.dumps(S)
            snrdata["noise_spectrum"] = pickle.dumps(N)
            snrdata["signal_window_start_time"] = signal_window.start
            snrdata["signal_window_end_time"] = signal_window.end
            snrdata["noise_window_start_time"] = noise_window.start
            snrdata["noise_window_end_time"] = noise_window.end

    except MsPASSError as err:
        newmessage = _reformat_mspass_error(
            err,
            "Spectrum calculation and EstimateBandwidth function section failed with the following message\n",
            "No SNR metrics can be computed for this datum",
        )
        my_logger.log_error(algname, newmessage, ErrorSeverity.Invalid)
        return [snrdata, my_logger]

    # For current implementation all the optional metrics require
    # computed a filtered version of the data.  If a new option is
    # desired that does not require filtering the data the logic
    # here will need to be changed to create a more exclusive test

    if len(optional_metrics) > 0:
        # use the mspass butterworth filter for speed - obspy
        # version requires a conversion to Trace objects
        BWfilt = Butterworth(
            False,
            True,
            True,
            poles,
            bwd.low_edge_f,
            poles,
            bwd.high_edge_f,
            data_object.dt,
        )
        filtered_data = TimeSeries(data_object)
        BWfilt.apply(filtered_data)
        nfilt = WindowData(filtered_data, noise_window.start, noise_window.end)
        sfilt = WindowData(filtered_data, signal_window.start,
                           signal_window.end)
        # In this implementation we don't need this any longer so we
        # delete it here.  If options are added beware
        del filtered_data
        # Some minor efficiency would be possible if we avoided
        # duplication of computations when multiple optional metrics
        # are requested, but the fragility that adds to maintenance
        # is not justified
        for metric in optional_metrics:
            if metric == "snr_stats":
                try:
                    stats = BandwidthStatistics(S, N, bwd)
                    # stats is a Metadata container - copy to snrdata
                    for k in stats.keys():
                        snrdata[k] = stats[k]
                except MsPASSError as err:
                    newmessage = _reformat_mspass_error(
                        err,
                        "BandwithStatistics throw the following error\n",
                        "Five snr_stats attributes were not computed",
                    )
                    my_logger.log_error(algname, newmessage, err.severity)
            if metric == "filtered_envelope":
                try:
                    analytic_nfilt = hilbert(nfilt.data)
                    analytic_sfilt = hilbert(sfilt.data)
                    nampvector = np.abs(analytic_nfilt)
                    sampvector = np.abs(analytic_sfilt)
                    namp = np.median(nampvector)
                    samp = np.max(sampvector)
                    snrdata[
                        "snr_envelope_Linf_over_L1"] = _safe_snr_calculation(
                            samp, namp)
                except:
                    my_logger.log_erro(
                        algname,
                        "Error computing filtered_envelope metrics:  snr_envelope_Linf_over_L1 not computed",
                        ErrorSeverity.Complaint,
                    )
            if metric == "filtered_L2":
                try:
                    namp = RMSAmplitude(nfilt)
                    samp = RMSAmplitude(sfilt)
                    snrvalue = _safe_snr_calculation(samp, namp)
                    snrdata["snr_L2"] = snrvalue
                except MsPASSError as err:
                    newmessage = _reformat_mspass_error(
                        err,
                        "Error computing filtered_L2 metric",
                        "snr_L2 attribute was not compouted",
                    )
                    my_logger.log_error(algname, newmessage, err.severity)

            if metric == "filtered_MAD":
                try:
                    namp = MADAmplitude(nfilt)
                    samp = MADAmplitude(sfilt)
                    snrvalue = _safe_snr_calculation(samp, namp)
                    snrdata["snr_MAD"] = snrvalue
                except MsPASSError as err:
                    newmessage = _reformat_mspass_error(
                        err,
                        "Error computing filtered_MAD metric",
                        "snr_MAD attribute was not computed",
                    )
                    my_logger.log_error(algname, newmessage, err.severity)

            if metric == "filtered_Linf":
                try:
                    # the C function expects a fraction - for users a percentage
                    # is clearer
                    namp = PercAmplitude(nfilt, perc / 100.0)
                    samp = PeakAmplitude(sfilt)
                    snrvalue = _safe_snr_calculation(samp, namp)
                    snrdata["snr_Linf"] = snrvalue
                    snrdata["snr_perc"] = perc
                except MsPASSError as err:
                    newmessage = _reformat_mspass_error(
                        err,
                        "Error computing filtered_Linf metric",
                        "snr_Linf attribute was not computed",
                    )
                    my_logger.log_error(algname, newmessage, err.severity)

            if metric == "filtered_perc":
                try:
                    namp = MADAmplitude(nfilt)
                    samp = PercAmplitude(sfilt, perc / 100.0)
                    snrvalue = _safe_snr_calculation(samp, namp)
                    snrdata["snr_perc"] = snrvalue
                    snrdata[
                        "snr_perc"] = perc  # redundant if filter_Linf is also run but tiny cost
                except MsPASSError as err:
                    newmessage = _reformat_mspass_error(
                        err,
                        "Error computing filtered_perc metric",
                        "snr_perf metric was not computed",
                    )
                    my_logger.log_error(algname, newmessage, err.severity)
            else:
                message = "Illegal optional_metrics keyword=" + metric + "\n"
                message += (
                    "If that is a typo expect some metrics will be missing from output"
                )
                my_logger.log_error(algname, message, ErrorSeverity.Complaint)
    return [snrdata, my_logger]