Пример #1
0
def sort_shift(df, st, dayst, sr):
    st.sort()
    dayst.sort()
    # check to make sure same length
    if len(st) > len(dayst):
        # remove extra traces from st
        for tr in st:
            if len(
                    dayst.select(station=tr.stats.station,
                                 channel=tr.stats.channel)) == 0:
                st.remove(tr)
    # gets number of samples between template time and event origin time
    origintime = UTCDateTime(df['Date'] + 'T' + df['Time'])
    regional = df['Regional']
    eventid = regional + str(df['ID'])
    detail = get_event_by_id(eventid, includesuperseded=True)
    phases = get_phase_dataframe(detail, catalog=regional)
    phases = phases[phases['Status'] == 'manual']
    shifts = np.zeros(len(st), dtype=int)
    for ii in range(len(phases)):
        net = phases.iloc[ii]['Channel'].split('.')[0]
        sta = phases.iloc[ii]['Channel'].split('.')[1]
        comp = phases.iloc[ii]['Channel'].split('.')[2]
        arr = UTCDateTime(phases.iloc[ii]['Arrival Time'])
        shift = int(np.round((arr - origintime) * sr))
        for jj in range(len(st)):
            if sta == st[jj].stats.station and comp == st[jj].stats.channel:
                print(sta + " " + comp + " " + str(shift))
                shifts[jj] = shift
    return shifts, st, dayst, phases
Пример #2
0
def test_magnitude_dataframe():
    cassettes, datadir = get_datadir()
    tape_file = os.path.join(cassettes, 'dataframes_magnitude.yaml')
    with vcr.use_cassette(tape_file):
        detail = get_event_by_id('us1000778i')  # 2016 NZ event
        df = get_phase_dataframe(detail, catalog='us')
        assert len(df) == 174
Пример #3
0
def test_magnitude_dataframe():
    datadir = get_datadir()
    tape_file = os.path.join(datadir, 'vcr_magnitude_dataframe.yaml')
    with vcr.use_cassette(tape_file):
        detail = get_event_by_id('us1000778i')  # 2016 NZ event
        df = get_phase_dataframe(detail, catalog='us')
        assert len(df) == 174
Пример #4
0
def test_phase_dataframe():
    cassettes, datadir = get_datadir()
    tape_file = os.path.join(cassettes, "dataframes_phase.yaml")
    with vcr.use_cassette(tape_file, record_mode="new_episodes"):
        detail = get_event_by_id("us1000778i")  # 2016 NZ event
        df = get_phase_dataframe(detail, catalog="us")
        assert len(df) == 174
Пример #5
0
def retrieve_usgs_catalog(**kwargs):
    """
    Wrapper on obspy.clients.fdsn.Client and libcomcat (usgs) to retrieve a full
    catalog, including phase picks (that otherwise are not supported by the usgs
    fdsn implementation)

    :param kwargs: Will be passed to the Client (e.g. minlongitude, maxmagnitude
        etc...)
    :return: obspy.core.events.Catalog
    """
    cli = Client('https://earthquake.usgs.gov')
    cat = cli.get_events(**kwargs)
    # Now loop over each event and grab the phase dataframe using libcomcat
    for ev in cat:
        print(ev.resource_id.id)
        eid = ev.resource_id.id.split('=')[-2].split('&')[0]
        detail = get_event_by_id(eid, includesuperseded=True)
        phase_df = get_phase_dataframe(detail)
        o = ev.preferred_origin()
        for i, phase_info in phase_df.iterrows():
            seed_id = phase_info['Channel'].split('.')
            loc = seed_id[-1]
            if loc == '--':
                loc = ''
            wf_id = WaveformStreamID(network_code=seed_id[0],
                                     station_code=seed_id[1],
                                     location_code=loc,
                                     channel_code=seed_id[2])
            pk = Pick(time=UTCDateTime(phase_info['Arrival Time']),
                      method=phase_info['Status'],
                      waveform_id=wf_id,
                      phase_hint=phase_info['Phase'])
            ev.picks.append(pk)
            arr = Arrival(pick_id=pk.resource_id.id,
                          phase=pk.phase_hint,
                          azimuth=phase_info['Azimuth'],
                          distance=phase_info['Distance'],
                          time_residual=phase_info['Residual'],
                          time_weight=phase_info['Weight'])
            o.arrivals.append(arr)
        # Try to read focal mechanisms/moment tensors
        if 'moment-tensor' in detail.products:
            # Always take MT where available
            mt_xml = detail.getProducts('moment-tensor')[0].getContentBytes(
                'quakeml.xml')[0]
        elif 'focal-mechanism' in detail.products:
            mt_xml = detail.getProducts('focal-mechanism')[0].getContentBytes(
                'quakeml.xml')[0]
        else:
            continue
        mt_ev = read_events(
            io.TextIOWrapper(io.BytesIO(mt_xml), encoding='utf-8'))
        FM = mt_ev[0].focal_mechanisms[0]
        FM.triggering_origin_id = ev.preferred_origin().resource_id.id
        ev.focal_mechanisms = [FM]
    return cat
Пример #6
0
def make_template(df, sr):
    client = Client("IRIS")
    # make templates
    regional = df['Regional']
    eventid = regional + str(df['ID'])
    detail = get_event_by_id(eventid, includesuperseded=True)
    phases = get_phase_dataframe(detail, catalog=regional)
    phases = phases[phases['Status'] == 'manual']
    print(phases)
    phases = phases[~phases.
                    duplicated(keep='first', subset=['Channel', 'Phase'])]
    print(phases)
    st = Stream()
    tr = Stream()
    print(phases)
    for ii in range(len(phases)):
        net = phases.iloc[ii]['Channel'].split('.')[0]
        sta = phases.iloc[ii]['Channel'].split('.')[1]
        comp = phases.iloc[ii]['Channel'].split('.')[2]
        #phase=phases.iloc[ii]['Phase']
        arr = UTCDateTime(phases.iloc[ii]['Arrival Time'])
        #print(int(np.round(arr.microsecond/(1/sr*10**6))*1/sr*10**6)==1000000)
        if int(np.round(arr.microsecond / (1 / sr * 10**6)) * 1 / sr *
               10**6) == 1000000:
            arr.microsecond = 0
            arr.second = arr.second + 1
        else:
            arr.microsecond = int(
                np.round(arr.microsecond / (1 / sr * 10**6)) * 1 / sr * 10**6)
        t1 = arr - 1
        t2 = arr + 9
        try:
            tr = client.get_waveforms(net, sta, "*", comp, t1 - 2, t2 + 2)
        except:
            print("No data for " + net + " " + sta + " " + comp + " " +
                  str(t1) + " " + str(t2))
        else:
            print("Data available for " + net + " " + sta + " " + comp + " " +
                  str(t1) + " " + str(t2))
            tr.detrend()
            tr.trim(starttime=t1 - 2,
                    endtime=t2 + 2,
                    nearest_sample=1,
                    pad=1,
                    fill_value=0)
            tr.filter("bandpass", freqmin=2, freqmax=7)
            tr.interpolate(sampling_rate=sr, starttime=t1)
            tr.trim(starttime=t1,
                    endtime=t2,
                    nearest_sample=1,
                    pad=1,
                    fill_value=0)
            st += tr
    return st
Пример #7
0
def check_phase_info(df):
    exists = 1
    regional = df['Regional']
    eventid = regional + str(df['ID'])
    try:
        detail = get_event_by_id(eventid, includesuperseded=True)
    except:
        exists = 0
    try:
        phases = get_phase_dataframe(detail, catalog=regional)
    except:
        exists = 0
    return exists
Пример #8
0
def test_nan_mags():
    detail = get_event_by_id('us2000arrw')
    try:
        _ = get_phase_dataframe(detail)
    except ParsingError:
        pass
Пример #9
0
def make_template(df, sampling_rate, filter=[0.2, 8], tcs_length=[1, 9]):
    '''
        download template by event ID
        original function by: Amanda Thomas
        modified by: Tim Lin
         -merge data with interpolate data point 2020.09
         -add multiple attemps when download has unexpected issue 2020.09
         
    '''
    from obspy.clients.fdsn import Client
    from libcomcat.search import get_event_by_id
    from libcomcat.dataframes import get_phase_dataframe
    from obspy import Stream
    import time
    client = Client("IRIS")
    # make templates
    regional = df['Regional']
    eventid = regional + str(df['ID'])
    detail = get_event_by_id(eventid, includesuperseded=True)
    OT = UTCDateTime(detail.time)  #event origin time
    phases = get_phase_dataframe(detail, catalog=regional)
    phases = phases[phases['Status'] == 'manual']
    phases = phases[~phases.
                    duplicated(keep='first', subset=['Channel', 'Phase'])]
    st = Stream()
    tr = Stream()
    sav_net_sta_comp = []
    sav_phase = []
    sav_arr = []
    sav_travel = []
    All_info = {}
    for ii in range(len(phases)):
        elems = phases.iloc[ii]['Channel'].split('.')
        net = elems[0]
        sta = elems[1]
        comp = elems[2]
        location = elems[3]
        if location == '--':
            location = ''  #sometime the location use -- instead of empty str
        Phase = phases.iloc[ii][
            'Phase']  #P or S wave, save this info for further relocation
        #phase = phases.iloc[ii]['Phase']
        arr = UTCDateTime(phases.iloc[ii]['Arrival Time'])
        #print(int(np.round(arr.microsecond/(1/sampling_rate*10**6))*1/sampling_rate*10**6)==1000000)
        if int(
                np.round(arr.microsecond / (1 / sampling_rate * 10**6)) * 1 /
                sampling_rate * 10**6) == 1000000:
            arr.microsecond = 0
            #arr.second=arr.second+1
            arr = arr + 1
        #print('arrival Time after arrangement',arr)
        else:
            arr.microsecond = int(
                np.round(arr.microsecond / (1 / sampling_rate * 10**6)) * 1 /
                sampling_rate * 10**6)
        t1 = arr - tcs_length[0]
        t2 = arr + tcs_length[1]
        #try:
        i_attempt = 0  #reset number of attempt
        tr_exist = False
        while i_attempt < 5:
            try:
                #tr = client.get_waveforms(net, sta, "*", comp, t1-2, t2+2) #Be careful, this may query more than 1 trace channel i.e. '00','01'...
                tr = client.get_waveforms(
                    net, sta, location, comp, t1 - 2, t2 + 2
                )  #Be careful, this may query more than 1 trace channel i.e. '00','01'...
                tr_exist = True
                break
            except:
                #print('Attempts %d'%(i_attempt),net, sta, "*", comp, t1-2, t2+2)
                time.sleep(2)  #wait 2 sec and try again later...
                i_attempt += 1
                continue

        if not (tr_exist):
            #some unexpected error when attemp to client.get_waveforms
            #print("No data for "+net+" "+sta+" "+comp+" "+str(t1)+" "+str(t2))
            continue
        else:
            #print("Data available:",net, sta, location, comp, t1-2, t2+2)
            tr.merge(method=1,
                     interpolation_samples=-1,
                     fill_value='interpolate')
            tr.detrend('linear')
            tr.trim(starttime=t1 - 2,
                    endtime=t2 + 2,
                    nearest_sample=1,
                    pad=1,
                    fill_value=0)
            if filter:
                tr.filter("bandpass", freqmin=filter[0], freqmax=filter[1])
            tr.interpolate(sampling_rate=sampling_rate,
                           starttime=t1,
                           method='linear')
            tr.trim(starttime=t1,
                    endtime=t2,
                    nearest_sample=1,
                    pad=1,
                    fill_value=0)
            assert len(
                tr) == 1, 'Unexpecting error when query:%s %s %s %s %s %s' % (
                    net, sta, location, comp, t1 - 2, t2 + 2)
            #tr[0].stats.location = tr[0].stats.location+'.'+Phase
            st += tr.copy()
            #save name, time and "phase" info for later relocation
            #.ms only gives starttime (know arrival time) but not P or S wave
            sav_net_sta_comp.append(net + '.' + sta + '.' + comp + '.' +
                                    location)
            sav_phase.append(Phase)
            #tmp_arrT = arr.strftime('%Y-%m-%dT%H:%M:%S.%f') #arrival time in isoformat i.e. 2018-05-05T17:44:18 or 2018-05-05T17:44:23.960000
            tmp_arrT = arr.strftime(
                '%Y-%m-%dT%H:%M:%S.%f')[:-4]  #accuracy to 0.01 sec
            #if len(tmp_arrT)==26:
            #    tmp_arrT = tmp_arrT[:-4]
            #print('Time:',tmp_arrT)
            sav_arr.append(tmp_arrT)
            sav_travel.append(arr - OT)  #travel time (relative to the origin)
    #=========IMPORTANT NOTE!!! the order of these array are NOT necessarily the same as st=====================================================
    #=========When there are both P and S data in the same net.sta.chn.loc, the order will be messed up when write in .ms file==================
    #=========Use these info as extra caution===================================================================================================
    #=========2020.11.12 Update: When calling bulk_make_template will will be runing additional re-order(All_info_reorder) to all the All_info==
    #=========================so the data generated from download_tools.bulk_make_template will be ready to use (the right order as .ms file)===
    All_info['net_sta_comp'] = np.array(sav_net_sta_comp)
    All_info['phase'] = np.array(sav_phase)
    All_info['arrival'] = np.array(sav_arr)  #absolute arrival time
    All_info['travel'] = np.array(sav_travel)  #phase travel time (sec)
    All_info['OT_template'] = OT.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-4]
    All_info['tcs_length'] = tcs_length
    All_info['filter'] = filter
    All_info['eqloc'] = [df['Lon'], df['Lat'], df['Depth']]
    All_info['eqmag'] = df['Magnitude']
    All_info['evid'] = eventid
    return st, All_info
Пример #10
0
def main():
    parser = get_parser()
    args = parser.parse_args()

    setup_logger(args.logfile, args.loglevel)

    if args.eventid:
        detail = get_event_by_id(args.eventid, catalog=args.catalog)
        try:
            df = get_phase_dataframe(detail, args.catalog)
            filename = save_dataframe(
                df, args.directory, detail, args.format, catalog=args.catalog)
            print('Saved phase data for %s to %s' % (detail.id, filename))
            sys.exit(0)
        except Exception as e:
            fmt = ('Could not extract the phase data due to the '
                   'following error: \n"%s"\n\nExiting.')
            print(fmt % (str(e)))
            sys.exit(1)

    if args.bounds and args.radius:
        print('Please specify either a bounding box OR radius search.')
        sys.exit(1)

    if not os.path.isdir(args.directory):
        os.makedirs(args.directory)

    latitude = None
    longitude = None
    radiuskm = None
    lonmin = latmin = lonmax = latmax = None
    starttime = endtime = None
    if args.radius:
        latitude = args.radius[0]
        longitude = args.radius[1]
        radiuskm = args.radius[2]

    if args.bounds:
        lonmin, lonmax, latmin, latmax = args.bounds
        # fix longitude bounds when crossing dateline
        if lonmin > lonmax and lonmax >= -180:
            lonmin -= 360

    minmag = 0.0
    maxmag = 9.9
    if args.magRange:
        minmag = args.magRange[0]
        maxmag = args.magRange[1]

    events = search(starttime=args.startTime,
                    endtime=args.endTime,
                    updatedafter=args.after,
                    minlatitude=latmin,
                    maxlatitude=latmax,
                    minlongitude=lonmin,
                    maxlongitude=lonmax,
                    latitude=latitude,
                    longitude=longitude,
                    maxradiuskm=radiuskm,
                    catalog=args.catalog,
                    contributor=args.contributor,
                    maxmagnitude=maxmag,
                    minmagnitude=minmag)

    if not len(events):
        print('No events found matching your search criteria. Exiting.')
        sys.exit(0)

    for event in events:
        if not event.hasProduct('phase-data'):
            continue
        try:
            detail = event.getDetailEvent()
            try:
                df = get_phase_dataframe(detail, args.catalog)
            except Exception as e:
                fmt = ('Could not get phase dataframe for '
                       'event %. Error "%s". Continuing.')
                tpl = (detail.id, str(e))
                print(fmt % tpl)
            filename = save_dataframe(
                df, args.directory, detail, args.format, catalog=args.catalog)

            print('Saved phase data for %s to %s' % (event.id, filename))
        except Exception as e:
            print('Failed to retrieve phase data for event %s.  Error "%s"... continuing.' % (
                event.id, str(e)))
            continue
Пример #11
0
def make_training_data(df, sr, winsize, phase):
    # make templates
    regional = df['Regional']
    if regional == 'uw':
        client = Client("IRIS")
    elif regional == "nc":
        client = Client("NCEDC")
    eventid = regional + str(df['ID'])
    detail = get_event_by_id(eventid, includesuperseded=True)
    phases = get_phase_dataframe(detail, catalog=regional)
    phases = phases[phases['Status'] == 'manual']
    if phase != 'N':
        phases = phases[phases['Phase'] == phase]
    # phases=phases[~phases.duplicated(keep='first',subset=['Channel','Phase'])]
    print(phases)
    st = Stream()
    for ii in range(len(phases)):
        tr = Stream()
        net = phases.iloc[ii]['Channel'].split('.')[0]
        sta = phases.iloc[ii]['Channel'].split('.')[1]
        comp = phases.iloc[ii]['Channel'].split('.')[2]
        pors = phases.iloc[ii]['Phase']
        #phase=phases.iloc[ii]['Phase']
        arr = UTCDateTime(phases.iloc[ii]['Arrival Time'])
        #print(int(np.round(arr.microsecond/(1/sr*10**6))*1/sr*10**6)==1000000)
        t1 = arr - winsize / 2
        t2 = arr + winsize / 2
        if phase == 'N':
            t1 -= 120
            t2 -= 120
        try:  # try to get the data
            tr = client.get_waveforms(net, sta, "*", comp, t1 - 1, t2 + 1)
            #print('Tr has length '+str(len(tr)))
        except:
            print("No data for " + net + " " + sta + " " + comp + " " +
                  str(t1) + " " + str(t2))
        else:
            print("Data available for " + net + " " + sta + " " + comp + " " +
                  str(t1) + " " + str(t2))
            try:  # try to subsample the data
                tr.interpolate(sampling_rate=sr, starttime=t1)
            except:
                print("Data interp issues")
            else:
                tr.trim(starttime=t1,
                        endtime=t2,
                        nearest_sample=1,
                        pad=1,
                        fill_value=0)
        if len(tr) > 0:
            tr[0].stats.location = pors
            st += tr
    for tr in st:
        # get rid of things that have lengths less than the desired length
        if len(tr.data) != sr * winsize + 1:
            st.remove(tr)
    for tr in st:
        # get rid of things that have all zeros
        if np.sum(tr.data) == len(tr.data):
            st.remove(tr)
    for tr in st:
        # get rid of things that NaNs
        if np.sum(np.isnan(tr.data)) > 0:
            st.remove(tr)
    st.detrend()
    #plot_training_data_streams(st,sr)
    stout = np.zeros((len(st), sr * winsize + 1))
    pors = np.zeros(len(st))
    for ii in range(len(st)):
        stout[ii, :] = st[ii].data
        if st[ii].stats.location == 'P':
            pors[ii] = 0
        if st[ii].stats.location == 'S':
            pors[ii] = 1
    return stout, pors