Пример #1
0
def multiplex(st, Nc=None, trimTolerance=15, template=False, returnlist=False,
              retst=False):
    """
    Multiplex an obspy stream object
    Parameters
    ----------
    st : instance of obspy stream
        The stream containing the data to multiplex. 
    Nc : None or int
        if not None the number of channels in stream, else try to determine
    trimTolerance : int
        The number of samples each channel can vary before being rejected
    Template : bool
        If True st is a template waveform, therefore an exception will be 
        raised if trimeTolerance is exceeded
    returnlist : bool
        If true also return np array of un-multiplexed data as a list
    
    Returns
    ------
    list with multiplexed data and other desired waveforms
    """
    if Nc is None:
        Nc = len(set([x.stats.station for x in st]))
    if Nc == 1:  # If only one channel do nothing
        C1 = st[0].data
        C = st[0].data

    else:
        chans = [x.data for x in st]  # Data on each channel
        minlen = np.array([len(x) for x in chans])
        if max(minlen) - min(minlen) > trimTolerance:
            netsta = st[0].stats.network + '.' + st[0].stats.station
            utc1 = str(st[0].stats.starttime).split('.')[0]
            utc2 = str(st[0].stats.endtime).split('.')[0]
            msg = ('Channel lengths are not within %d on %s from %s to %s' %
                   (trimTolerance, netsta, utc1, utc2))
            if template:
                detex.log(__name__, msg, level='error')
            else:
                msg = msg + ' trimming to shortest channel'
                detex.log(__name__, msg, level='warning', pri=True)
                trimDim = min(minlen)  # trim to smalles dimension
                chansTrimed = [x[:trimDim] for x in chans]
        elif max(minlen) - min(minlen) > 0:  # if all channels not equal lengths
            trimDim = min(minlen)
            chansTrimed = [x[:trimDim] for x in chans]  # trim to shortest
        elif max(minlen) - min(minlen) == 0:  # if chan lengths are exactly equal
            chansTrimed = chans
        C = np.vstack((chansTrimed))
        C1 = np.ndarray.flatten(C, order='F')
    out = [C1]  # init output list
    if returnlist:
        out.append(C)
    if retst:
        out.append(st)
    if len(out) == 1:
        return out[0]
    else:
        return out
Пример #2
0
def _divideIntoChunks(utc1, utc2, duration, randSamps):
    """
    Function to take two utc date time objects and create a generator to yield
    all time in between by intercals of duration. If randSamps is not None
    it will return a random subsample, still divisible by randSamps to make 
    loading files easier. The randSamps parameter can at most rep. 
    Inputs can be any obspy readable format
    """
    utc1 = obspy.UTCDateTime(utc1)
    utc2 = obspy.UTCDateTime(utc2)
    # convert to time stamps (epoch time)
    ts1 = utc1.timestamp - utc1.timestamp % duration
    ts2 = utc2.timestamp - utc2.timestamp % duration
    if randSamps is None:
        t = ts1
        while t <= ts2:
            yield obspy.UTCDateTime(t)  # yield a value
            t += duration  # add an hour
    else:

        utcList = np.arange(utc1.timestamp, utc2.timestamp, duration)
        if randSamps > len(utcList) / 4:
            msg = ('Population too small for %d random samples, taking %d' % (
                randSamps, len(utcList)))
            detex.log(__name__, msg, level='info')
            randSamps = len(utcList)
        ranutc = random.sample(utcList, randSamps)
        rsamps = [obspy.UTCDateTime(x) for x in ranutc]
        for samp in rsamps:
            yield samp
Пример #3
0
def _ensureUnique(cx, cxdf):
    """
    Make sure each coeficient is unique so it can be used as a key to 
    reference time lags, if not unique perturb slightly
    """
    se = pd.Series(cx)
    dups = se[se.duplicated()]
    count = 0
    while len(dups) > 0:
        msg = ('Duplicates found in correlation coefficients,'
               'perturbing slightly to get unique values')
        detex.log(__name__, msg, level='warning', pri=True)
        for a in dups.iteritems():
            se[a[0]] = a[1] - abs(.00001 * np.random.rand())
        count += 1
        dups = se[se.duplicated()]
        if count > 10:
            msg = 'cannot make Coeficients unique, killing program'
            detex.log(__name__, msg, level='error')
    if count > 1:  # if the cx has been perturbed update cxdf
        for a in range(len(cxdf)):
            sindex = sum(pd.isnull(a[1]))
            tri1 = _triangular(len(cxdf))
            tri2 = _triangular(len(cxdf) - a)
            tri3 = _triangular(len(cxdf) - (a + 1))
            cxdf.values[a, sindex:] = cx[tri1 - tri2, tri1 - tri3]
    return se.values, cxdf
Пример #4
0
def _checkClusterInputs(filt, dtype, trim, decimate):
    """
    Check a few key input parameters to make sure everything is kosher
    """
    if filt is not None and len(filt) != 4:  # check filt
        msg = 'filt must either be None (no filter) or a len 4 list or tuple'
        detex.log(__name__, msg, level='error')

    if dtype != 'double' and dtype != 'single':  # check dtype
        msg = ('dype must be either "double" or "single" not %s, setting to \
                double' % dtype)
        dtype = 'double'
        detex.log(__name__, msg, level='warn', pri=True)

    if trim is not None:  # check trim
        if len(trim) != 2:
            msg = 'Trim must be a list or tuple of length 2'
            detex.log(__name__, msg, level='warn', pri=True)
        else:
            if -trim[0] > trim[1]:
                msg = 'Invalid trim parameters'
                detex.log(__name__, msg, level='error')

    if decimate is not None:
        if not isinstance(decimate, int):
            msg = 'decimate must be an int'
            detex.log(__name__, msg, level='error', e=TypeError)
Пример #5
0
def _checkSTALTA(st, filt, STATime, LTATime, limit):
    """
    Take a stream and make sure it's vert. component (or first comp
    if no vert) does not exceed limit given STATime and LTATime
    Return True if passes, false if fails
    """
    if limit is None:
        return True
    if len(st) < 1:
        return None
    try:
        stz = st.select(component='Z')[0]
    except IndexError:  # if no Z found on trace
        return None
    if len(stz) < 1:
        stz = st[0]
    sz = stz.copy()
    sr = sz.stats.sampling_rate
    ltaSamps = LTATime * sr
    staSamps = STATime * sr
    cft = classic_sta_lta(sz.data, staSamps, ltaSamps)
    if np.max(cft) <= limit:
        return True
    else:
        sta = sz.stats.station
        t1 = sz.stats.starttime
        t2 = sz.stats.endtime
        msg = ('%s fails sta/lta req of %d between %s and %s' %
               (sta, limit, t1, t2))
        detex.log(__name__, msg, level='warn')
        return False
Пример #6
0
def _subSamp(Ceval, ind):
    """ 
    Method to estimate subsample time delays using cosine-fit interpolation
    Cespedes, I., Huang, Y., Ophir, J. & Spratt, S. 
    Methods for estimation of sub-sample time delays of digitized echo signals. 
    Ultrason. Imaging 17, 142–171 (1995)
    
    Returns
    -------
    The amount the sample should be shifted (float between -.5 and .5)
    """
    # If max occurs at start or end of CC no extrapolation
    if ind == 0 or ind == len(Ceval) - 1:
        tau = 0.0
    else:
        cb4 = Ceval[ind - 1]
        caf = Ceval[ind + 1]
        cn = Ceval[ind]
        alpha = np.arccos((cb4 + caf) / (2 * cn))
        alsi = np.sin(alpha)
        tau = -(np.arctan((cb4 - caf) / (2 * cn * alsi)) / alpha)
        if abs(tau) > .5:
            msg = ('subsample failing, more than .5 sample shift predicted')
            detex.log(__name__, msg, level='Warning', pri=True)
            return ind
    return tau
Пример #7
0
def writeKMLFromStationKey(df='StationKey.csv', outname='stations.kml'):
    """
    Write a KML file from a station key

    Parameters
    -------------
    DF : str or pandas Dataframe
        If str then the path to the station key. If dataframe then
        station key loaded with readKey function with key_type='template'
    outname : str
        name of the kml file
    """
    if isinstance(df, string_types):
        df = pd.read_csv(df)
    elif not isinstance(df, pd.DataFrame):
        msg = ('Input type not understood, must be path to station key or '
               'dataframe of station key')
        detex.log(__name__, msg, level='error')
    kml = simplekml.Kml(open=1)
    for a in df.iterrows():
        pnt = kml.newpoint()
        pnt.name = str(a[1].STATION)
        pnt.coords = [(a[1].LON, a[1].LAT)]
        # print(a[1].STATION,a[1].LON,a[1].LAT)
    kml.save(outname)
Пример #8
0
    def _CreateCoeffArray(self, corSeries, name, threshold, sta, offsets, mags,
                          ewf, MPcon, events, ssTD, WFU, UtU):
        """
        function to create an array of results for each detection, including
        time of detection, estimated magnitude, etc. 
        """
        dpv = 0
        cols = [
            'DS', 'DS_STALTA', 'STMP', 'Name', 'Sta', 'MSTAMPmin', 'MSTAMPmax',
            'Mag', 'SNR', 'ProEnMag'
        ]
        sr = corSeries.SampRate  # sample rate
        start = corSeries.TimeStamp  # start time of data block

        # set array to evaluate for successful triggers
        if self.trigCon == 0:
            Ceval = corSeries.SSdetect.copy()
        elif self.trigCon == 1:
            Ceval = corSeries.STALTA.copy()
        Sar = pd.DataFrame(columns=cols)
        count = 0
        # while there are any values in the det stat. vect that exceed thresh.
        while Ceval.max() >= threshold[name]:
            trigIndex = Ceval.argmax()
            coef = corSeries.SSdetect[trigIndex]
            times = float(trigIndex) / sr + start
            if self.fillZeros:  # if zeros are being filled dont try STA/LTA
                SLValue = 0.0
            else:
                try:
                    SLValue = corSeries.STALTA[trigIndex]
                except TypeError:
                    SLValue = 0.0
            Ceval = self._downPlayArrayAroundMax(Ceval, sr, dpv)
            # estimate mags else return NaNs as mag estimates
            if self.estimateMags:  # estimate magnitudes
                M1, M2, SNR = self._estMag(trigIndex, corSeries, MPcon,
                                           mags[name], events[name], WFU[name],
                                           UtU[name], ewf[name], coef, times,
                                           name, sta)
                peMag, stMag = M1, M2
            else:
                peMag, stMag, SNR = np.NaN, np.NaN, np.NaN

            # kill switch to prevent infinite loop (just in case)
            if count > 4000:
                msg = (('over 4000 events found in single data block on %s for'
                        '%s around %s') % (sta, name, times))
                detex.log(__name__, msg, level='error')

            # get predicted origin time ranges
            minof = np.min(offsets[name])
            maxof = np.max(offsets[name])
            MSTAMPmax, MSTAMPmin = times - minof, times - maxof
            Sar.loc[count] = [
                coef, SLValue, times, name, sta, MSTAMPmin, MSTAMPmax, stMag,
                SNR, peMag
            ]
            count += 1
        return Sar
Пример #9
0
def _deleteDetDups(ssDB, trigCon, trigParameter, associateBuffer, starttime,
                   endtime, stations, tableName, PfKey=None):
    """
    delete dections of same event, keep only detection with highest 
    detection statistic
    """
    sslist = []
    SQLstr = _buildSQL(PfKey, trigCon, trigParameter,
                       starttime, stations, endtime, tableName)
    for sql in SQLstr:
        loadedRes = detex.util.loadSQLite(ssDB, tableName, sql=sql)
        if isinstance(loadedRes, pd.DataFrame):
            sslist.append(loadedRes)
    if len(sslist) < 1:  # if no events found
        return None
    try:
        ssdf = pd.concat(sslist, ignore_index=True)
    except ValueError:
        msg = 'Cant create detResults instance, no detections meet all reqs'
        detex.log(__name__, msg, level='error')
    ssdf.reset_index(drop=True, inplace=True)
    ssdf.sort_values(by=['Sta', 'MSTAMPmin'], inplace=True)
    con1 = ((ssdf.MSTAMPmin - associateBuffer) > ssdf.MSTAMPmax.shift())
    con2 = ssdf.Sta != ssdf.Sta.shift()
    ssdf['Gnum'] = (con1 | con2).cumsum()
    ssdf.sort_values(by=['Gnum', 'DS'], inplace=True)
    ssdf.drop_duplicates(subset='Gnum', keep='last', inplace=True)
    ssdf.reset_index(inplace=True, drop=True)

    return ssdf
Пример #10
0
def _tryDownloadData(net,sta,chan,loc, utcStart,utcEnd,client): # get data, return False if fail
    try:
        st=client.get_waveforms(net,sta,loc,chan,utcStart,utcEnd,attach_response=True)
        return st
    except:
        detex.log(__name__,'Download failed for %s.%s %s from %s to %s' % (net,sta,chan,str(utcStart),str(utcEnd)))
        return False      
Пример #11
0
def _subSamp(Ceval, ind):
    """ 
    Method to estimate subsample time delays using cosine-fit interpolation
    Cespedes, I., Huang, Y., Ophir, J. & Spratt, S. 
    Methods for estimation of sub-sample time delays of digitized echo signals. 
    Ultrason. Imaging 17, 142–171 (1995)
    
    Returns
    -------
    The amount the sample should be shifted (float between -.5 and .5)
    """
    # If max occurs at start or end of CC no extrapolation
    if ind == 0 or ind == len(Ceval) - 1:
        tau = 0.0
    else:
        cb4 = Ceval[ind - 1]
        caf = Ceval[ind + 1]
        cn = Ceval[ind]
        alpha = np.arccos((cb4 + caf) / (2 * cn))
        alsi = np.sin(alpha)
        tau = -(np.arctan((cb4 - caf) / (2 * cn * alsi)) / alpha)
        if abs(tau) > .5:
            msg = ('subsample failing, more than .5 sample shift predicted')
            detex.log(__name__, msg, level='Warning', pri=True)
            return ind
    return tau
Пример #12
0
def _ensureUnique(cx, cxdf):
    """
    Make sure each coeficient is unique so it can be used as a key to 
    reference time lags, if not unique perturb slightly
    """
    se = pd.Series(cx)
    dups = se[se.duplicated()]
    count = 0
    while len(dups) > 0:
        msg = ('Duplicates found in correlation coefficients,'
               'perturbing slightly to get unique values')
        detex.log(__name__, msg, level='warning', pri=True)
        for a in dups.iteritems():
            se[a[0]] = a[1] - abs(.00001 * np.random.rand())
        count += 1
        dups = se[se.duplicated()]
        if count > 10:
            msg = 'cannot make Coeficients unique, killing program'
            detex.log(__name__, msg, level='error')
    if count > 1:  # if the cx has been perturbed update cxdf
        for a in range(len(cxdf)):
            sindex = sum(pd.isnull(a[1]))
            tri1 = _triangular(len(cxdf))
            tri2 = _triangular(len(cxdf) - a)
            tri3 = _triangular(len(cxdf) - (a + 1))
            cxdf.values[a, sindex:] = cx[tri1 - tri2, tri1 - tri3]
    return se.values, cxdf
Пример #13
0
def writeKMLFromStationKey(df='StationKey.csv', outname='stations.kml'):
    """
    Write a KML file from a station key

    Parameters
    -------------
    DF : str or pandas Dataframe
        If str then the path to the station key. If dataframe then
        station key loaded with readKey function with key_type='template'
    outname : str
        name of the kml file
    """
    if isinstance(df, string_types):
        df = pd.read_csv(df)
    elif not isinstance(df, pd.DataFrame):
        msg = ('Input type not understood, must be path to station key or '
               'dataframe of station key')
        detex.log(__name__, msg, level='error')
    kml = simplekml.Kml(open=1)
    for a in df.iterrows():
        pnt = kml.newpoint()
        pnt.name = str(a[1].STATION)
        pnt.coords = [(a[1].LON, a[1].LAT)]
        # print(a[1].STATION,a[1].LON,a[1].LAT)
    kml.save(outname)
Пример #14
0
def _checkClusterInputs(filt, dtype, trim, decimate):
    """
    Check a few key input parameters to make sure everything is kosher
    """
    if filt is not None and len(filt) != 4:  # check filt
        msg = 'filt must either be None (no filter) or a len 4 list or tuple'
        detex.log(__name__, msg, level='error')

    if dtype != 'double' and dtype != 'single':  # check dtype
        msg = ('dype must be either "double" or "single" not %s, setting to \
                double' % dtype)
        dtype = 'double'
        detex.log(__name__, msg, level='warn', pri=True)

    if trim is not None:  # check trim
        if len(trim) != 2:
            msg = 'Trim must be a list or tuple of length 2'
            detex.log(__name__, msg, level='warn', pri=True)
        else:
            if -trim[0] > trim[1]:
                msg = 'Invalid trim parameters'
                detex.log(__name__, msg, level='error')

    if decimate is not None:
        if not isinstance(decimate, int):
            msg = 'decimate must be an int'
            detex.log(__name__, msg, level='error', e=TypeError)
Пример #15
0
def _removeInstrumentResposne(st,prefilt,opType):
    st.detrend('linear')# detrend
    st= _fftprep(st)
    try: 
        st.remove_response(output=opType,pre_filt=prefilt)
    except:
        detex.log(__name__,'RemoveResponse Failed for %s,%s, not saving' %(st[0].stats.network,st[0].stats.station),level='warning')
        st=False
    return st
Пример #16
0
    def _CreateCoeffArray(self, corSeries, name, threshold, sta, offsets, mags,
                          ewf, MPcon, events, ssTD, WFU, UtU):
        """
        function to create an array of results for each detection, including
        time of detection, estimated magnitude, etc. 
        """
        dpv = 0
        cols = ['DS', 'DS_STALTA', 'STMP', 'Name', 'Sta', 'MSTAMPmin',
                'MSTAMPmax', 'Mag', 'SNR', 'ProEnMag']
        sr = corSeries.SampRate  # sample rate
        start = corSeries.TimeStamp  # start time of data block

        # set array to evaluate for successful triggers
        if self.trigCon == 0:
            Ceval = corSeries.SSdetect.copy()
        elif self.trigCon == 1:
            Ceval = corSeries.STALTA.copy()
        Sar = pd.DataFrame(columns=cols)
        count = 0
        # while there are any values in the det stat. vect that exceed thresh. 
        while Ceval.max() >= threshold[name]:
            trigIndex = Ceval.argmax()
            coef = corSeries.SSdetect[trigIndex]
            times = float(trigIndex) / sr + start
            if self.fillZeros:  # if zeros are being filled dont try STA/LTA
                SLValue = 0.0
            else:
                try:
                    SLValue = corSeries.STALTA[trigIndex]
                except TypeError:
                    SLValue = 0.0
            Ceval = self._downPlayArrayAroundMax(Ceval, sr, dpv)
            # estimate mags else return NaNs as mag estimates
            if self.estimateMags:  # estimate magnitudes
                M1, M2, SNR = self._estMag(trigIndex, corSeries, MPcon,
                                           mags[name], events[name], WFU[name],
                                           UtU[name], ewf[name], coef, times,
                                           name, sta)
                peMag, stMag = M1, M2
            else:
                peMag, stMag, SNR = np.NaN, np.NaN, np.NaN

            # kill switch to prevent infinite loop (just in case)
            if count > 4000:
                msg = (('over 4000 events found in single data block on %s for'
                        '%s around %s') % (sta, name, times))
                detex.log(__name__, msg, level='error')

            # get predicted origin time ranges
            minof = np.min(offsets[name])
            maxof = np.max(offsets[name])
            MSTAMPmax, MSTAMPmin = times - minof, times - maxof
            Sar.loc[count] = [coef, SLValue, times, name, sta, MSTAMPmin,
                              MSTAMPmax, stMag, SNR, peMag]
            count += 1
        return Sar
Пример #17
0
def _loadDirectoryData(fet, start, end, net, sta, chan, loc):
    """
    Function to load continuous data from the detex directory structure

    """
    # get times with slight buffer
    t1 = obspy.UTCDateTime(start).timestamp
    t2 = obspy.UTCDateTime(end).timestamp
    buf = 3 * fet.conDatDuration
    dfind = _loadIndexDb(fet.directoryName, net + '.' + sta, t1 - buf, t2 + buf)

    if dfind is None:
        t1p = obspy.UTCDateTime(t1)
        t2p = obspy.UTCDateTime(t2)
        msg = 'data from %s to %s on %s not found in %s' % (t1p, t2p, sta,
                                                            fet.directoryName)
        detex.log(__name__, msg, level='warning', pri=False)
        return None
    # define conditions in which condata should not be loaded
    # con1 and con2 - No overlap (other than 10%)
    tra = t2 - t1  # time range
    con1 = ((dfind.Starttime <= t1) & (dfind.Endtime - tra * .1 < t1) &
            (dfind.Starttime < t2) & (dfind.Endtime < t2))
    con2 = ((dfind.Starttime > t1) & (dfind.Endtime > t1) &
            (dfind.Starttime + tra * .1 > t2) & (dfind.Endtime >= t2))
    df = dfind[~(con1 | con2)]

    if len(df) < 1:
        t1p = obspy.UTCDateTime(t1)
        t2p = obspy.UTCDateTime(t2)
        msg = 'data from %s to %s on %s not found in %s' % (t1p, t2p, sta,
                                                            fet.directoryName)
        detex.log(__name__, msg, level='warning', pri=False)
        return None

    st = obspy.core.Stream()

    if len(df.Path) < 1:  # if no event fits description
        return None
    for path, fname in zip(df.Path, df.FileName):
        fil = os.path.join(path, fname)
        st1 = read(fil)
        if not st1 is None:
            st += st1
    # st.trim(starttime=start, endtime=end)
    # check if chan variable is string else iterate
    if isinstance(chan, string_types):
        stout = st.select(channel=chan)
    else:
        stout = obspy.core.Stream()
        for cha in chan:
            stout += st.select(channel=cha)

    loc = '*' if loc in ['???', '??'] else loc  # convert ? to *
    stout = stout.select(location=loc)
    return stout
Пример #18
0
def get_number_channels(st):
    """
    Take an obspy stream and get the number of unique channels in stream
    (stream must have only one station)
    """
    if len(set([x.stats.station for x in st])) > 1:
        msg = 'function only takes streams with exactly 1 station'
        detex.log(__name__, msg, level='error')
    nc = len(list(set([x.stats.channel for x in st])))
    return nc
Пример #19
0
def get_number_channels(st):
    """
    Take an obspy stream and get the number of unique channels in stream
    (stream must have only one station)
    """
    if len(set([x.stats.station for x in st])) > 1:
        msg = 'function only takes streams with exactly 1 station'
        detex.log(__name__, msg, level='error')
    nc = len(list(set([x.stats.channel for x in st])))
    return nc
Пример #20
0
def _getTemData(templatekey, stakey, templateDir, formatout, removeResponse,
                prefilt, client, timeBeforeOrigin, timeAfterOrigin, loc,
                opType):
    HD = os.getcwd()  # Get current directory
    client = Client(client)
    for temkey in templatekey.iterrows(
    ):  # iterate through each template key row
        eventID = temkey[1]['NAME']
        SK = stakey
        SK = SK[[not np.isnan(x) for x in SK.LAT]]
        try:
            oTime = obspy.core.UTCDateTime(temkey[1]['TIME'])  # origin time
        except:
            detex.log(__name__,
                      '%s is a bad entry in TIME column for event %s' %
                      (temkey[1]['TIME'], temkey[1]['NAME']),
                      level='error')
            raise Exception('%s is a bad entry in TIME column for event %s' %
                            (temkey[1]['TIME'], temkey[1]['NAME']))
        utcStart = oTime - timeBeforeOrigin  # start time is event origin time minus timeBeforeOrigin
        utcEnd = oTime + timeAfterOrigin  # end time is event origin time plus timeafterOrigin
        for sk in SK.iterrows():  # iterate through each row of station keys
            chans = sk[1]['CHANNELS'].replace('-', ',')
            net = sk[1]['NETWORK']
            sta = sk[1]['STATION']
            st = True
            UTCstr = '%04d-%03dT%02d-%02d-%02d' % (oTime.year, oTime.julday,
                                                   oTime.hour, oTime.minute,
                                                   oTime.second)
            sdir = os.path.join(HD, templateDir, eventID).replace(' ', '')
            svar = ('%s.%s.%s.pkl' % (net, sta, UTCstr)).replace(' ', '')
            if os.path.isfile(os.path.join(
                    sdir, svar)):  #IF file aready exits skip process
                #detex.log(__name__,'%s already exists'%svar)
                st = False
            if st != False:
                st = _tryDownloadData(net, sta, chans, loc, utcStart, utcEnd,
                                      client)
                if st != False:  #_tryDownloadData can return false, second st check is needed
                    if (temkey[0] + 1) % 25 == 0:
                        detex.log(
                            __name__,
                            '%d events downloaded out of %d for Station %s' %
                            (temkey[0] + 1, len(templatekey), sk[1].STATION),
                            pri=True)
                    if removeResponse == True:
                        st = _removeInstrumentResposne(st, prefilt, opType)
                    if st != False:
                        if not os.path.isdir(
                                sdir
                        ):  # Create Waveform sub directory if it does not exis
                            os.makedirs(sdir)
                        st.write(os.path.join(sdir, svar), formatout)
Пример #21
0
    def _estMag(self, trigIndex, corSeries, MPcon, mags, events,
                WFU, UtU, ewf, coef, times, name, sta):
        """
        Estimate magnitudes by applying projected subspace mag estimates 
        and standard deviation mag estimates as outlined in Chambers et al. 
        2015.
        """
        WFlen = np.shape(WFU)[1]  # event waveform length
        nc = corSeries.Nc  # number of chans
        # continuous data chunk that triggered  subspace
        ConDat = MPcon[trigIndex * nc:trigIndex * nc + WFlen]
        if self.issubspace:
            # continuous data chunk projected into subspace
            ssCon = np.dot(UtU, ConDat)
            # projected energy
            proEn = np.var(ssCon) / np.var(WFU, axis=1)

        # Try and estimate pre-event noise level (for estimating SNR)
        if trigIndex * nc > 5 * WFlen:  # take 5x waveform length before event
            pe = MPcon[trigIndex * nc - 5 * WFlen: trigIndex * nc]
            rollingstd = pd.rolling_std(pe, WFlen)[WFlen - 1:]
        else:  # if not enough data take 6 times after event
            pe = MPcon[trigIndex * nc: trigIndex * nc + WFlen + 6 * WFlen]
            rollingstd = pd.rolling_std(pe, WFlen)[WFlen - 1:]
        baseNoise = np.median(rollingstd)  # take median of std for noise level
        SNR = np.std(ConDat) / baseNoise  # estiamte SNR

        # ensure mags are greater than -15, else assume no mag value for event
        touse = mags > -15
        if self.issubspace:  # if subspace
            if not any(touse):  # if no defined magnitudes avaliable
                msg = (('No magnitudes above -15 usable for detection at %s on'
                        ' station %s and %s') % (times, sta, name))
                detex.log(__name__, msg, level='warn')
                return np.NaN, np.Nan, SNR
            else:
                # correlation coefs between each event and data block
                ecor = [fast_normcorr(x, ConDat)[0] for x in ewf]
                eventCors = np.array(ecor)
                projectedEnergyMags = _estPEMag(mags, proEn, eventCors, touse)
                stdMags = _estSTDMag(mags, ConDat, ewf, eventCors, touse)
        else:  # if singleton
            assert len(mags) == 1
            if np.isnan(mags[0]) or mags[0] < -15:
                projectedEnergyMags = np.NaN
                stdMags = np.NaN
            else:
                # use simple waveform scaling if single
                d1 = np.dot(ConDat, WFU[0])
                d2 = np.dot(WFU[0], WFU[0])
                projectedEnergyMags = mags[0] + d1 / d2
                stdMags = mags[0] + np.log10(np.std(ConDat) / np.std(WFU[0]))
        return projectedEnergyMags, stdMags, SNR
Пример #22
0
    def _estMag(self, trigIndex, corSeries, MPcon, mags, events, WFU, UtU, ewf,
                coef, times, name, sta):
        """
        Estimate magnitudes by applying projected subspace mag estimates 
        and standard deviation mag estimates as outlined in Chambers et al. 
        2015.
        """
        WFlen = np.shape(WFU)[1]  # event waveform length
        nc = corSeries.Nc  # number of chans
        # continuous data chunk that triggered  subspace
        ConDat = MPcon[trigIndex * nc:trigIndex * nc + WFlen]
        if self.issubspace:
            # continuous data chunk projected into subspace
            ssCon = np.dot(UtU, ConDat)
            # projected energy
            proEn = np.var(ssCon) / np.var(WFU, axis=1)

        # Try and estimate pre-event noise level (for estimating SNR)
        if trigIndex * nc > 5 * WFlen:  # take 5x waveform length before event
            pe = MPcon[trigIndex * nc - 5 * WFlen:trigIndex * nc]
            rollingstd = pd.rolling_std(pe, WFlen)[WFlen - 1:]
        else:  # if not enough data take 6 times after event
            pe = MPcon[trigIndex * nc:trigIndex * nc + WFlen + 6 * WFlen]
            rollingstd = pd.rolling_std(pe, WFlen)[WFlen - 1:]
        baseNoise = np.median(rollingstd)  # take median of std for noise level
        SNR = np.std(ConDat) / baseNoise  # estiamte SNR

        # ensure mags are greater than -15, else assume no mag value for event
        touse = mags > -15
        if self.issubspace:  # if subspace
            if not any(touse):  # if no defined magnitudes avaliable
                msg = (('No magnitudes above -15 usable for detection at %s on'
                        ' station %s and %s') % (times, sta, name))
                detex.log(__name__, msg, level='warn')
                return np.NaN, np.Nan, SNR
            else:
                # correlation coefs between each event and data block
                ecor = [fast_normcorr(x, ConDat)[0] for x in ewf]
                eventCors = np.array(ecor)
                projectedEnergyMags = _estPEMag(mags, proEn, eventCors, touse)
                stdMags = _estSTDMag(mags, ConDat, ewf, eventCors, touse)
        else:  # if singleton
            assert len(mags) == 1
            if np.isnan(mags[0]) or mags[0] < -15:
                projectedEnergyMags = np.NaN
                stdMags = np.NaN
            else:
                # use simple waveform scaling if single
                d1 = np.dot(ConDat, WFU[0])
                d2 = np.dot(WFU[0], WFU[0])
                projectedEnergyMags = mags[0] + d1 / d2
                stdMags = mags[0] + np.log10(np.std(ConDat) / np.std(WFU[0]))
        return projectedEnergyMags, stdMags, SNR
Пример #23
0
def _removeInstrumentResposne(st, prefilt, opType):
    st.detrend('linear')  # detrend
    st = _fftprep(st)
    try:
        st.remove_response(output=opType, pre_filt=prefilt)
    except:
        detex.log(__name__,
                  'RemoveResponse Failed for %s,%s, not saving' %
                  (st[0].stats.network, st[0].stats.station),
                  level='warning')
        st = False
    return st
Пример #24
0
def _getConDataStation(sk, HD, ConDir, removeResponse, prefilt, opType,
                       formatout, secBuf, loc, client, reverse):
    chans = sk[1]['CHANNELS'].replace('-', ',')
    net = sk[1]['NETWORK']
    sta = sk[1]['STATION']
    utcStart = obspy.core.UTCDateTime(sk[1]['STARTTIME'])
    utcStart = utcStart - utcStart.timestamp % 3600  # get time to nearest hour
    utcEnd = obspy.core.UTCDateTime(sk[1]['ENDTIME'])
    utcEnd = utcEnd - utcEnd.timestamp % 3600  # get time to nearest hour
    timeArray = [
        utcStart + x * 3600
        for x in range(int(utcEnd.timestamp - utcStart.timestamp) / 3600 + 1)
    ]  #get array with start stop times by hour
    if reverse:
        timeArray = timeArray[::-1]
    for t in range(len(timeArray) - 1):
        if reverse:
            oTime = timeArray[t + 1]
        else:
            oTime = timeArray[t]
        st = True
        UTCstr = '%04d-%03dT%02d' % (oTime.year, oTime.julday, oTime.hour)
        sdir = os.path.join(HD, ConDir, net + '.' + sta, str(oTime.year),
                            "%03d" %
                            (oTime.julday))  # Save directory for current loop
        svar = ('%s.%s.%s.pkl' % (net, sta, UTCstr))
        if os.path.isfile(os.path.join(
                sdir, svar)):  #IF file aready exits skip process
            #detex.log(__name__,'%s already exists'%svar)
            st = False
        if st != False:
            if reverse:
                st = _tryDownloadData(net, sta, chans, loc, timeArray[t + 1],
                                      timeArray[t] + secBuf, client)
            else:
                st = _tryDownloadData(net, sta, chans, loc, timeArray[t],
                                      timeArray[t + 1] + secBuf, client)
            if st != False:
                if removeResponse == True:
                    st = _removeInstrumentResposne(st, prefilt, opType)
                if not os.path.isdir(
                        sdir
                ):  # Create Waveform sub directory if it does not exist
                    os.makedirs(sdir)
                if st != False:
                    try:
                        st.write(os.path.join(sdir, svar), formatout)
                    except:
                        detex.log(__name__,
                                  'wrtiing %s in %s failed',
                                  level='warning')
Пример #25
0
def _assignClientFunction(client):
    """
    function to take an obspy client FDSN, NEIC, EW, etc. return the 
    correct loadFromClient function for getting data.
    """
    if isinstance(client, obspy.clients.fdsn.Client):
        return _loadFromFDSN
    elif isinstance(client, obspy.clients.neic.Client):
        return _loadFromNEIC
    elif isinstance(client, obspy.clients.earthworm.Client):
        return _loadFromEarthworm
    else:
        msg = 'Client type not supported'
        detex.log(__name__, msg, level='error', e=TypeError)
Пример #26
0
def read(path):
    """
    function to read a file from a path. If IOError or TypeError simply try
    appending os.set to start
    """
    try:
        st = obspy.read(path)
    except (IOError, TypeError):
        try:
            st = obspy.read(os.path.join(os.path.sep, path))
        except (IOError, TypeError):
            msg = 'Cannot read %s, the file may be corrupt, skipping it' % path
            detex.log(__name__, msg, level='warn', pri=True)
            return None
    return st
Пример #27
0
def _removeInstrumentResponse(fet, st):
    if not fet.removeResponse:  # pass stream back if no response removal
        return st
    st.detrend('linear')  # detrend
    st = _fftprep(st)
    try:
        st.remove_response(output=fet.opType, pre_filt=fet.prefilt)
    except:
        utc1 = str(st[0].stats.starttime).split('.')[0]
        utc2 = str(st[0].stats.endtime).split('.')[0]
        msg = 'RemoveResponse Failed for %s,%s, from %s to %s, skipping' % (
            st[0].stats.network, st[0].stats.station, utc1, utc2)
        detex.log(__name__, msg, level='warning', pri=True)
        st = None
    return st
Пример #28
0
def _tryDownloadData(net, sta, chan, loc, utcStart, utcEnd,
                     client):  # get data, return False if fail
    try:
        st = client.get_waveforms(net,
                                  sta,
                                  loc,
                                  chan,
                                  utcStart,
                                  utcEnd,
                                  attach_response=True)
        return st
    except:
        detex.log(
            __name__, 'Download failed for %s.%s %s from %s to %s' %
            (net, sta, chan, str(utcStart), str(utcEnd)))
        return False
Пример #29
0
    def __init__(self, method, client=None, removeResponse=True,
                 inventoryArg=None, directoryName=None, opType='VEL',
                 prefilt=[.05, .1, 15, 20], conDatDuration=3600, conBuff=120,
                 timeBeforeOrigin=1 * 60, timeAfterOrigin=4 * 60, checkData=True,
                 fillZeros=False):

        self.__dict__.update(locals())  # Instantiate all inputs
        self.inventory = _getInventory(inventoryArg)
        self._checkInputs()

        if self.removeResponse and self.inventory is None:
            if self.method == 'dir':
                msg = ('Cannot remove response without a valid inventoryArg, '
                       'setting removeResponse to False')
                detex.log(__name__, msg, level='warning', pri=True)
                self.removeResponse = False
Пример #30
0
def _getSampleRates(df):
    """
    Function to get the sample rates on the main detex DataFrame
    """
    if isinstance(df, pd.DataFrame):
        row = df.iloc[0]
    else:
        row = df
    srs = set([row.Stats[x]['sampling_rate'] for x in row.Events])
    # make sure all sampling rates are the same else error out
    if len(srs) > 1:
        msg = ('Not all samp rates equal for all events on %s, skipping '
               'subspace or singles %s') % (row.Station, df.Names.values)
        detex.log(__name__, msg, level='warn', pri=True)
        return None
    return list(srs)
Пример #31
0
def _getSampleRates(df):
    """
    Function to get the sample rates on the main detex DataFrame
    """
    if isinstance(df, pd.DataFrame):
        row = df.iloc[0]
    else:
        row = df
    srs = set([row.Stats[x]['sampling_rate'] for x in row.Events])
    # make sure all sampling rates are the same else error out
    if len(srs) > 1:
        msg = ('Not all samp rates equal for all events on %s, skipping '
               'subspace or singles %s') % (row.Station, df.Names.values)
        detex.log(__name__, msg, level='warn', pri=True)
        return None
    return list(srs)
Пример #32
0
def readKey(dfkey, key_type='template'):
    """
    Read a template key csv and perform checks for required columns
    Parameters
    ---------
    dfkey : str or pandas DataFrame
        A path to the template key csv or the DataFrame itself
    key_type : str
        "template" for template key or "station" for station key
    Returns
    --------
    A pandas DataFrame if required columns exist, else raise Exception

    """
    # key types and required columns

    key_types = req_columns.keys()

    if key_type not in key_types:
        msg = "unsported key type, supported types are %s" % (key_types)
        detex.log(__name__, msg, level='error')

    if isinstance(dfkey, string_types):
        if not os.path.exists(dfkey):
            msg = '%s does not exists, check path' % dfkey
            detex.log(__name__, msg, level='error')
        else:
            df = pd.read_csv(dfkey)
    elif isinstance(dfkey, pd.DataFrame):
        df = dfkey
    else:
        msg = 'Data type of dfkey not understood'
        detex.log(__name__, msg, level='error')

    # Check required columns
    if not req_columns[key_type].issubset(df.columns):
        msg = (
            'Required columns not in %s, required columns for %s key are %s' %
            (df.columns, key_type, req_columns[key_type]))
        detex.log(__name__, msg, level='error')

    tdf = df.loc[:, list(req_columns[key_type])]
    condition = [
        all([x != '' for item, x in row.iteritems()])
        for num, row in tdf.iterrows()
    ]
    df = df[condition]

    # TODO if column TIME is utcDateTime object sorting fails, fix this
    df.sort_values(by=list(req_columns[key_type]), inplace=True)
    df.reset_index(drop=True, inplace=True)

    # specific operations for various key types
    if key_type == 'station':
        df['STATION'] = [str(x) for x in df['STATION']]
        df['NETWORK'] = [str(x) for x in df['NETWORK']]
    return df
Пример #33
0
def loadClusters(filename='clust.pkl'):
    """
    Function that uses pandas.read_pickle to load a pickled cluster
    (instance of detex.subspace.ClusterStream)
    Parameters
    ----------
    filename : str
        Path to the saved cluster isntance
    Returns
    ----------
    An instance of detex.subspace.ClusterStream
    """
    cl = pd.read_pickle(filename)
    if not isinstance(cl, detex.subspace.ClusterStream):
        msg = '%s is not a ClusterStream instance' % filename
        detex.log(__name__, msg, level='error')
    return cl
Пример #34
0
def loadSubSpace(filename='subspace.pkl'):
    """
    Function that uses pandas.read_pickle to load a pickled subspace
    (instance of detex.subspace.SubSpaceStream)
    Parameters
    ----------
    filename : str
        Path to the saved subspace instance
    Returns
    ----------
    An instance of detex.subspace.SubSpaceStream
    """
    ss = pd.read_pickle(filename)
    if not isinstance(ss, detex.subspace.SubSpace):
        msg = '%s is not a SubSpaceStream instance' % filename
        detex.log(__name__, msg, level='error')
    return ss
Пример #35
0
def _getChannels(df):
    """
    Function to get the channels on the main detex DataFrame
    """
    if isinstance(df, pd.DataFrame):
        row = df.iloc[0]
    else:
        row = df
    chansAr = np.array(row.Channels.values())
    chans = set([x for x in chansAr.flat])
    # make sure all channels are the same for each event
    if not all([chans == set(x) for x in row.Channels.values()]):
        msg = ('Not all channels are the same for all event on %s, skipping '
               'subspace or singles %s') % (row.Station, df.Names.values)
        detex.log(__name__, msg, level='warning', pri=True)
        return None
    return list(chans)
Пример #36
0
def loadClusters(filename='clust.pkl'):
    """
    Function that uses pandas.read_pickle to load a pickled cluster
    (instance of detex.subspace.ClusterStream)
    Parameters
    ----------
    filename : str
        Path to the saved cluster isntance
    Returns
    ----------
    An instance of detex.subspace.ClusterStream 
    """
    cl = pd.read_pickle(filename)
    if not isinstance(cl, detex.subspace.ClusterStream):
        msg = '%s is not a ClusterStream instance' % filename
        detex.log(__name__, msg, level='error')
    return cl
Пример #37
0
def loadSubSpace(filename='subspace.pkl'):
    """
    Function that uses pandas.read_pickle to load a pickled subspace
    (instance of detex.subspace.SubSpaceStream)
    Parameters
    ----------
    filename : str
        Path to the saved subspace instance
    Returns
    ----------
    An instance of detex.subspace.SubSpaceStream 
    """
    ss = pd.read_pickle(filename)
    if not isinstance(ss, detex.subspace.SubSpace):
        msg = '%s is not a SubSpaceStream instance' % filename
        detex.log(__name__, msg, level='error')
    return ss
Пример #38
0
def _getChannels(df):
    """
    Function to get the channels on the main detex DataFrame
    """
    if isinstance(df, pd.DataFrame):
        row = df.iloc[0]
    else:
        row = df
    chansAr = np.array(row.Channels.values())
    chans = set([x for x in chansAr.flat])
    # make sure all channels are the same for each event
    if not all([chans == set(x) for x in row.Channels.values()]):
        msg = ('Not all channels are the same for all event on %s, skipping '
               'subspace or singles %s') % (row.Station, df.Names.values)
        detex.log(__name__, msg, level='warning', pri=True)
        return None
    return list(chans)
Пример #39
0
def _loadFromEarthworm(fet, start, end, net, sta, chan, loc):
    client = fet.client
    startstr = str(start)
    endstr = str(end)
    st = obspy.Stream()
    if '*' in loc or '?' in loc:  # adjust for earthworm loc codes
        loc = '--'
    for cha in chan:
        try:  # try neic client
            st += client.get_waveforms(net, sta, loc, cha, start, end)
        except:

            msg = ('Could not fetch data on %s from %s to %s' %
                   (net + '.' + sta, startstr, endstr))
            detex.log(__name__, msg, level='warning', pri=False)
            st = None
    return st
Пример #40
0
def _getDSVect(
    fetcher,
    stakey,
    utc1,
    utc2,
    filt,
    deci,
    dtype,
    conDatNum,
    Nc,
    reqlen,
    sta,
    lta,
    ssArrayTD,
    ssArrayFD,
    limit=None,
):
    # get a generator to return streams, ask for 4x more for rejects
    stgen = fetcher.getConData(stakey,
                               utcstart=utc1,
                               utcend=utc2,
                               randSamps=conDatNum * 4)
    count = 0  # normal count
    scount = 0  # success count
    DSmat = []
    for st in stgen:  # loop over random samps of continuous data
        if st is None or len(st) < 1:
            continue  # no need to log, fetcher will do it
        count += 1
        st = detex.construct._applyFilter(st, filt, deci, dtype)
        if st is None or len(st) < 1:
            continue  # no need to log, fetcher will do it
        passSTALTA = _checkSTALTA(st, filt, sta, lta, limit)
        if not passSTALTA:
            continue
        if scount >= conDatNum:  # if we have all we need
            break
        mpCon = detex.construct.multiplex(st, Nc)
        dsVect = _MPXSSCorr(mpCon, reqlen, ssArrayTD, ssArrayFD, Nc)
        DSmat.append(dsVect)
        scount += 1
    if count == 0:
        msg = 'Could not get any data for %s' % (stakey.STATION.iloc[0])
        detex.log(__name__, msg, level='error')
    return DSmat, count, scount
Пример #41
0
def _loadFromNEIC(fet, start, end, net, sta, chan, loc):
    """
    Use obspy.neic.Client to fetch waveforms
    """
    client = fet.client
    # str reps of utc objects for error messages
    startstr = str(start)
    endstr = str(end)
    st = obspy.Stream()
    for cha in chan:
        try:  # try neic client
            st += client.get_waveforms(net, sta, loc, cha, start, end)
        except:
            msg = ('Could not fetch data on %s from %s to %s' %
                   (net + '.' + sta, startstr, endstr))
            detex.log(__name__, msg, level='warning', pri=False)
            st = None
    return st
Пример #42
0
def readKey(dfkey, key_type='template'):
    """
    Read a template key csv and perform checks for required columns
    Parameters
    ---------
    dfkey : str or pandas DataFrame
        A path to the template key csv or the DataFrame itself
    key_type : str
        "template" for template key or "station" for station key
    Returns
    --------
    A pandas DataFrame if required columns exist, else raise Exception

    """
    # key types and required columns

    key_types = req_columns.keys()

    if key_type not in key_types:
        msg = "unsported key type, supported types are %s" % (key_types)
        detex.log(__name__, msg, level='error')

    if isinstance(dfkey, string_types):
        if not os.path.exists(dfkey):
            msg = '%s does not exists, check path' % dfkey
            detex.log(__name__, msg, level='error')
        else:
            df = pd.read_csv(dfkey)
    elif isinstance(dfkey, pd.DataFrame):
        df = dfkey
    else:
        msg = 'Data type of dfkey not understood'
        detex.log(__name__, msg, level='error')

    # Check required columns
    if not req_columns[key_type].issubset(df.columns):
        msg = ('Required columns not in %s, required columns for %s key are %s'
               % (df.columns, key_type, req_columns[key_type]))
        detex.log(__name__, msg, level='error')

    tdf = df.loc[:, list(req_columns[key_type])]
    condition = [all([x != '' for item, x in row.iteritems()])
                 for num, row in tdf.iterrows()]
    df = df[condition]

    # TODO if column TIME is utcDateTime object sorting fails, fix this
    df.sort_values(by=list(req_columns[key_type]), inplace=True)
    df.reset_index(drop=True, inplace=True)

    # specific operations for various key types
    if key_type == 'station':
        df['STATION'] = [str(x) for x in df['STATION']]
        df['NETWORK'] = [str(x) for x in df['NETWORK']]
    return df
Пример #43
0
def _applyFilter(st, filt, decimate=False, dtype='double', fillZeros=False):
    """
    Apply a filter, decimate, and trim to even start/end times 
    """
    if st is None or len(st) < 1:
        msg = '_applyFilter got a stream with 0 length'
        detex.log(__name__, msg, level='warn')
        return obspy.Stream()
    st.sort()
    st1 = st.copy()
    if dtype == 'single':  # cast into single
        for num, tr in enumerate(st):
            st[num].data = tr.data.astype(np.float32)
    nc = list(set([x.stats.channel for x in st]))
    if len(st) > len(nc):  # if data is fragmented only keep largest chunk
        if fillZeros:
            st = _mergeChannelsFill(st)
        else:
            st = _mergeChannels(st)
    if not len(st) == len(nc) or len(st) < 1:
        sta = st1[0].stats.station
        stime = str(st1[0].stats.starttime)
        msg = 'Stream is too fractured around %s on %s' % (str(stime), sta)
        detex.log(__name__, msg, level='warn')
        return obspy.Stream()
        # st1.write('failed_merge-%s-%s.pkl'%(sta, stime), 'pickle')
        # assert len(st) == len(nc)
    if decimate:
        st.decimate(decimate)

    startTrim = max([x.stats.starttime for x in st])
    endTrim = min([x.stats.endtime for x in st])
    if startTrim > endTrim:  # return empty string if chans dont overlap
        return obspy.Stream()
    st.trim(starttime=startTrim, endtime=endTrim)
    st = st.split()
    st.detrend('linear')
    if isinstance(filt, list) or isinstance(filt, tuple):
        st.filter('bandpass',
                  freqmin=filt[0],
                  freqmax=filt[1],
                  corners=filt[2],
                  zerophase=filt[3])
    return st
Пример #44
0
def _readVeriFile(veriFile):
    try:
        df = pd.read_csv(veriFile)
    except Exception:
        try:
            df = pd.read_pickle(veriFile)
        except Exception:
            try:
                df = detex.util.loadSQLite(veriFile, 'verify')
            except Exception:
                msg = ('%s could not be read, it must either be csv, pickled'
                       'dataframe or sqlite database') % veriFile
                detex.log(__name__, msg, level='error')
    reqcols = ['TIME', 'LAT', 'LON', 'MAG', 'DEPTH', 'NAME']  # required cols
    if not set(reqcols).issubset(df.columns):
        msg = ('%s does not have the required columns, it needs '
               'TIME,LAT,LON,MAG,DEPTH,NAME') % veriFile
        detex.log(__name__, msg, level='error')
    return df
Пример #45
0
def _readVeriFile(veriFile):
    try:
        df = pd.read_csv(veriFile)
    except Exception:
        try:
            df = pd.read_pickle(veriFile)
        except Exception:
            try:
                df = detex.util.loadSQLite(veriFile, 'verify')
            except Exception:
                msg = ('%s could not be read, it must either be csv, pickled'
                       'dataframe or sqlite database') % veriFile
                detex.log(__name__, msg, level='error')
    reqcols = ['TIME', 'LAT', 'LON', 'MAG', 'DEPTH', 'NAME']  # required cols
    if not set(reqcols).issubset(df.columns):
        msg = ('%s does not have the required columns, it needs '
               'TIME,LAT,LON,MAG,DEPTH,NAME') % veriFile
        detex.log(__name__, msg, level='error')
    return df
Пример #46
0
def _alignTD(delayDF, srow):
    """
    loop through delay Df and apply offsets to create alligned 
    arrays dictionary
    """
    aligned = {}
    # find the required length for each aligned stream
    TDlengths = len(srow.MPtd[delayDF.Events[0]]) - max(delayDF.SampleDelays)
    for ind, row in delayDF.iterrows():
        orig = srow.MPtd[row.Events]
        orig = orig[row.SampleDelays:]
        orig = orig[:TDlengths]
        aligned[row.Events] = orig
        if len(orig) == 0:
            msg = ('Alignment of multiplexed stream failing on %s, \
                   try raising ccreq or widenning trim window' % srow.Station)
            msg2 = _idAlignProblems(delayDF)
            detex.log(__name__, msg + msg2, level='error')
    return aligned
Пример #47
0
def _alignTD(delayDF, srow):
    """
    loop through delay Df and apply offsets to create alligned 
    arrays dictionary
    """
    aligned = {}
    # find the required length for each aligned stream
    TDlengths = len(srow.MPtd[delayDF.Events[0]]) - max(delayDF.SampleDelays)
    for ind, row in delayDF.iterrows():
        orig = srow.MPtd[row.Events]
        orig = orig[row.SampleDelays:]
        orig = orig[:TDlengths]
        aligned[row.Events] = orig
        if len(orig) == 0:
            msg = ('Alignment of multiplexed stream failing on %s, \
                   try raising ccreq or widenning trim window' % srow.Station)
            msg2 = _idAlignProblems(delayDF)
            detex.log(__name__, msg + msg2, level='error')
    return aligned
Пример #48
0
def _CCX2(mpfd1, mpfd2, mptd1, mptd2, Nc1, Nc2):
    """
    Function find max correlation coeficient and corresponding lag time
    between 2 traces. fft should already have been calculated
    """
    if len(Nc1) != len(Nc2):  # make sure there are the same number of channels
        msg = 'Number of Channels not equal, cannot perform correlation'
        detex.log(__name__, msg, level='error')
    Nc = len(Nc1)  # Number of channels
    if len(mptd1) != len(mptd2) or len(mpfd2) != len(mpfd1):
        msg = 'Lengths not equal on multiplexed data, cannot correlate'
        detex.log(__name__, msg, level='error')
    n = len(mptd1)

    trunc = n // (2 * Nc) - 1  # truncate value
    # trunc = n - 1
    # n = trunc + 1

    mptd2Temp = mptd2.copy()
    mptd2Temp = np.lib.pad(mptd2Temp, (n - 1, n - 1),
                           str('constant'),
                           constant_values=(0, 0))
    a = pd.rolling_mean(mptd2Temp, n)[n - 1:]
    b = pd.rolling_std(mptd2Temp, n)[n - 1:]
    b *= np.sqrt((n - 1.0) / n)
    c = np.real(scipy.fftpack.ifft(np.multiply(np.conj(mpfd1), mpfd2)))
    c1 = np.concatenate([c[-(n - 1):], c[:n]])  # swap end to start
    # slice by # of channels as not to mix match chans in multplexed stream
    result = ((c1 - mptd1.sum() * a) / (n * b * np.std(mptd1)))[Nc - 1::Nc]
    result = result[trunc:-trunc]
    try:
        maxcc = np.nanmax(result)
        mincc = np.nanmin(result)
        maxind = np.nanargmax(result)
        if maxcc > 1. or mincc < -1.:  # if a inf is found in array
            # this can happen if some of the waveforms have been zeroed out
            result[(result > 1) | (result < -1)] = 0
            maxcc = np.nanmax(result)
            maxind = np.nanargmax(result)
    except ValueError:  # if fails skip
        return 0.0, 0.0, 0.0
    subsamp = _subSamp(result, maxind)
    return maxcc, (maxind + 1 + trunc) * Nc - (n), subsamp
Пример #49
0
def _verifyEvents(Dets,Autos,veriFile,veriBuffer,includeAllVeriColumns):
    if not veriFile or not os.path.exists(veriFile): 
        detex.log(__name__, 'No veriFile passed or it does not exist, skipping verification', pri=True)
        return
    else:        
        vertem=_readVeriFile(veriFile)
        vertem['STMP']=[obspy.core.UTCDateTime(x) for x in vertem['TIME']]
        verlist=[]
        additionalColumns=list(set(vertem.columns)-set(['TIME','LAT','LON','MAG','ProEnMag','DEPTH','NAME']))
        vertem
        for vernum,verrow in vertem.iterrows():
            temDets=Dets[(Dets.MSTAMPmin-veriBuffer/2.<verrow.STMP)&(Dets.MSTAMPmax+veriBuffer/2.0>verrow.STMP)&([not x for x in Dets.Verified])]
            if len(temDets)>0: #todo handle this when multiple verifications occur
                trudet=temDets[temDets.DSav==temDets.DSav.max()]
                Dets.loc[trudet.index[0],'Verified']=True

                if includeAllVeriColumns:
                    for col in additionalColumns:
                        if not col in trudet.columns:
                            trudet[col]=verrow[col]
                trudet['VerMag'],trudet['VerLat'],trudet['VerLon'],trudet['VerDepth'],trudet['VerName']=verrow.MAG,verrow.LAT,verrow.LON,verrow.DEPTH,verrow.NAME
                verlist.append(trudet)
            else:
                temAutos=Autos[(Autos.MSTAMPmin-veriBuffer/2.<verrow.STMP)&(Autos.MSTAMPmax+veriBuffer/2.0>verrow.STMP)&([not x for x in Autos.Verified])]
                if len(temAutos)>0: #todo handle this when multiple verifications occur
                    trudet=temAutos[temAutos.DSav==temAutos.DSav.max()]
                    Autos.loc[trudet.index[0],'Verified']=True

                    if includeAllVeriColumns:
                        for col in additionalColumns:
                            if not col in trudet.columns:
                                trudet[col]=verrow[col]
                    trudet['VerMag'],trudet['VerLat'],trudet['VerLon'],trudet['VerDepth'],trudet['VerName']=verrow.MAG,verrow.LAT,verrow.LON,verrow.DEPTH,verrow.NAME
                    verlist.append(trudet)
        if len(verlist)>0:
            verifs=pd.concat(verlist,ignore_index=True)
            verifs.sort(columns=['Event','DSav']) #sort and drop duplicates so each verify event is verified only once
            verifs.drop_duplicates(subset='Event')
            verifs.drop('Verified', axis=1, inplace=True)
        else:
            verifs=pd.DataFrame()
        return verifs
Пример #50
0
def _CCX2(mpfd1, mpfd2, mptd1, mptd2, Nc1, Nc2):
    """
    Function find max correlation coeficient and corresponding lag time
    between 2 traces. fft should already have been calculated
    """
    if len(Nc1) != len(Nc2):  # make sure there are the same number of channels
        msg = 'Number of Channels not equal, cannot perform correlation'
        detex.log(__name__, msg, level='error')
    Nc = len(Nc1)  # Number of channels
    if len(mptd1) != len(mptd2) or len(mpfd2) != len(mpfd1):
        msg = 'Lengths not equal on multiplexed data, cannot correlate'
        detex.log(__name__, msg, level='error')
    n = len(mptd1)

    trunc = n // (2 * Nc) - 1  # truncate value
    # trunc = n - 1
    # n = trunc + 1

    mptd2Temp = mptd2.copy()
    mptd2Temp = np.lib.pad(mptd2Temp, (n - 1, n - 1), str('constant'),
                           constant_values=(0, 0))
    a = pd.rolling_mean(mptd2Temp, n)[n - 1:]
    b = pd.rolling_std(mptd2Temp, n)[n - 1:]
    b *= np.sqrt((n - 1.0) / n)
    c = np.real(scipy.fftpack.ifft(np.multiply(np.conj(mpfd1), mpfd2)))
    c1 = np.concatenate([c[-(n - 1):], c[:n]])  # swap end to start
    # slice by # of channels as not to mix match chans in multplexed stream
    result = ((c1 - mptd1.sum() * a) / (n * b * np.std(mptd1)))[Nc - 1::Nc]
    result = result[trunc: -trunc]
    try:
        maxcc = np.nanmax(result)
        mincc = np.nanmin(result)
        maxind = np.nanargmax(result)
        if maxcc > 1. or mincc < -1.:  # if a inf is found in array
            # this can happen if some of the waveforms have been zeroed out
            result[(result > 1) | (result < -1)] = 0
            maxcc = np.nanmax(result)
            maxind = np.nanargmax(result)
    except ValueError:  # if fails skip
        return 0.0, 0.0, 0.0
    subsamp = _subSamp(result, maxind)
    return maxcc, (maxind + 1 + trunc) * Nc - (n), subsamp
Пример #51
0
def _testStreamLengths(TRDF, row, ind):
    lens = np.array([len(x) for x in row.MPtd.values()])
    # trim to smallest length if within 90% of median, else kill key
    le = np.min(lens[lens > np.median(lens) * .9])

    keysToKill = [x for x in row.Events if len(row.MPtd[x]) < le]
    # trim events slightly too small if any
    for key in row.Events:
        trimed = row.MPtd[key][:le]
        TRDF.loc[ind, 'MPtd'][key] = trimed
    # rest keys on TRDF
    tmar = np.array(TRDF.Events[ind])
    tk = [not x in keysToKill for x in TRDF.Events[ind]]
    TRDF.Events[ind] = tmar[np.array(tk)]
    for key in keysToKill:
        msg = ('%s on %s is out of length tolerance, removing' %
               (key, row.Station))
        detex.log(__name__, msg, level='warn', pri=True)
        TRDF.MPtd[ind].pop(key, None)
    return TRDF
Пример #52
0
def _applyFilter(st, filt, decimate=False, dtype='double', fillZeros=False):
    """
    Apply a filter, decimate, and trim to even start/end times 
    """
    if st is None or len(st) < 1:
        msg = '_applyFilter got a stream with 0 length'
        detex.log(__name__, msg, level='warn')
        return obspy.Stream()
    st.sort()
    st1 = st.copy()
    if dtype == 'single':  # cast into single
        for num, tr in enumerate(st):
            st[num].data = tr.data.astype(np.float32)
    nc = list(set([x.stats.channel for x in st]))
    if len(st) > len(nc):  # if data is fragmented only keep largest chunk
        if fillZeros:
            st = _mergeChannelsFill(st)
        else:
            st = _mergeChannels(st)
    if not len(st) == len(nc) or len(st) < 1:
        sta = st1[0].stats.station
        stime = str(st1[0].stats.starttime)
        msg = 'Stream is too fractured around %s on %s' % (str(stime), sta)
        detex.log(__name__, msg, level='warn')
        return obspy.Stream()
        # st1.write('failed_merge-%s-%s.pkl'%(sta, stime), 'pickle')
        # assert len(st) == len(nc)
    if decimate:
        st.decimate(decimate)

    startTrim = max([x.stats.starttime for x in st])
    endTrim = min([x.stats.endtime for x in st])
    if startTrim > endTrim:  # return empty string if chans dont overlap
        return obspy.Stream()
    st.trim(starttime=startTrim, endtime=endTrim)
    st = st.split()
    st.detrend('linear')
    if isinstance(filt, list) or isinstance(filt, tuple):
        st.filter('bandpass', freqmin=filt[0], freqmax=filt[1],
                  corners=filt[2], zerophase=filt[3])
    return st