示例#1
0
def process(tr,
            lowcut,
            highcut,
            filt_order,
            samp_rate,
            debug,
            starttime=False,
            clip=False,
            length=86400,
            seisan_chan_names=False,
            ignore_length=False,
            fill_gaps=True):
    """
    Basic function to process data, usually called by dayproc or shortproc.

    Functionally, this will bandpass, downsample and check headers and length
    of trace to ensure files start when they should and are the correct length.
    This is a simple wrapper on obspy functions, we include it here to provide
    a system to ensure all parts of the dataset are processed in the same way.

    .. note:: Usually this function is called via dayproc or shortproc.

    :type tr: obspy.core.trace.Trace
    :param tr: Trace to process
    :type lowcut: float
    :param lowcut: Low cut in Hz, if set to None and highcut is set, will use \
        a lowpass filter.
    :type highcut: float
    :param highcut: High cut in Hz, if set to None and lowcut is set, will \
        use a highpass filter.
    :type filt_order: int
    :param filt_order: Number of corners for filter.
    :type samp_rate: float
    :param samp_rate: Desired sampling rate in Hz.
    :type debug: int
    :param debug: Debug output level from 0-5, higher numbers = more output.
    :type starttime: obspy.core.utcdatetime.UTCDateTime
    :param starttime: Desired start of trace
    :type clip: bool
    :param clip: Whether to expect, and enforce a set length of data or not.
    :type length: float
    :param length: Use to set a fixed length for data from the given starttime.
    :type seisan_chan_names: bool
    :param seisan_chan_names:
        Whether channels are named like seisan channels (which are two letters
        rather than SEED convention of three) - defaults to True.
    :type ignore_length: bool
    :param ignore_length: See warning in dayproc.
    :type fill_gaps: bool
    :param fill_gaps: Whether to pad any gaps found with zeros or not.

    :return: Processed trace.
    :type: :class:`obspy.core.stream.Trace`
    """
    # Add sanity check
    if highcut and highcut >= 0.5 * samp_rate:
        raise IOError('Highcut must be lower than the nyquist')

    # Define the start-time
    if starttime:
        # Be nice and allow a datetime object.
        if isinstance(starttime, dt.date) or isinstance(
                starttime, dt.datetime):
            starttime = UTCDateTime(starttime)
        day = starttime.date
    else:
        day = tr.stats.starttime.date

    debug_print('Working on: ' + tr.stats.station + '.' + tr.stats.channel, 2,
                debug)
    if debug >= 5:
        tr.plot()
    # Check if the trace is gappy and pad if it is.
    gappy = False
    if isinstance(tr.data, np.ma.MaskedArray):
        gappy = True
        gaps, tr = _fill_gaps(tr)
    # Do a brute force quality check
    qual = _check_daylong(tr)
    if not qual:
        msg = ("Data have more zeros than actual data, please check the raw",
               " data set-up and manually sort it: " + tr.stats.station + "." +
               tr.stats.channel)
        raise ValueError(msg)
    tr = tr.detrend('simple')
    # Detrend data before filtering
    debug_print(
        'I have ' + str(len(tr.data)) + ' data points for ' +
        tr.stats.station + '.' + tr.stats.channel + ' before processing', 0,
        debug)

    # Sanity check to ensure files are daylong
    padded = False
    if float(tr.stats.npts / tr.stats.sampling_rate) != length and clip:
        debug_print(
            'Data for ' + tr.stats.station + '.' + tr.stats.channel +
            ' are not of daylong length, will zero pad', 2, debug)
        if tr.stats.endtime - tr.stats.starttime < 0.8 * length\
           and not ignore_length:
            raise NotImplementedError(
                "Data for {0}.{1} is {2} hours long, which is less than 80 "
                "percent of the desired length, will not pad".format(
                    tr.stats.station, tr.stats.channel,
                    (tr.stats.endtime - tr.stats.starttime) / 3600))
        # trim, then calculate length of any pads required
        tr = tr.trim(starttime, starttime + length, nearest_sample=True)
        pre_pad_secs = tr.stats.starttime - starttime
        post_pad_secs = (starttime + length) - tr.stats.endtime
        if pre_pad_secs > 0 or post_pad_secs > 0:
            padded = True
            pre_pad = np.zeros(int(pre_pad_secs * tr.stats.sampling_rate))
            post_pad = np.zeros(int(post_pad_secs * tr.stats.sampling_rate))
            debug_print(str(tr), 2, debug)
            debug_print(
                "Padding to day long with %f s before and %f s at end" %
                (pre_pad_secs, post_pad_secs), 1, debug)
            tr.data = np.concatenate([pre_pad, tr.data, post_pad])
            # Use this rather than the expected pad because of rounding samples
            tr.stats.starttime -= len(pre_pad) * tr.stats.delta
            debug_print(str(tr), 2, debug)
        # If there is one sample too many after this remove the first one
        # by convention
        if len(tr.data) == (length * tr.stats.sampling_rate) + 1:
            tr.data = tr.data[1:len(tr.data)]
        if not tr.stats.sampling_rate * length == tr.stats.npts:
            raise ValueError('Data are not daylong for ' + tr.stats.station +
                             '.' + tr.stats.channel)
        debug_print(
            'I now have %i data points after enforcing length' % len(tr.data),
            0, debug)
    # Check sampling rate and resample
    if tr.stats.sampling_rate != samp_rate:
        debug_print('Resampling', 1, debug)
        tr.resample(samp_rate)
    # Filtering section
    tr = tr.detrend('simple')  # Detrend data again before filtering
    if highcut and lowcut:
        debug_print('Bandpassing', 1, debug)
        tr.data = bandpass(tr.data, lowcut, highcut, tr.stats.sampling_rate,
                           filt_order, True)
    elif highcut:
        debug_print('Lowpassing', 1, debug)
        tr.data = lowpass(tr.data, highcut, tr.stats.sampling_rate, filt_order,
                          True)
    elif lowcut:
        debug_print('Highpassing', 1, debug)
        tr.data = highpass(tr.data, lowcut, tr.stats.sampling_rate, filt_order,
                           True)
    else:
        debug_print('No filters applied', 2, debug)
    # Account for two letter channel names in s-files and therefore templates
    if seisan_chan_names:
        tr.stats.channel = tr.stats.channel[0] + tr.stats.channel[-1]

    # Sanity check the time header
    if tr.stats.starttime.day != day and clip:
        debug_print(
            "Time headers do not match expected date: {0}".format(
                tr.stats.starttime), 2, debug)

    if padded:
        debug_print("Reapplying zero pads post processing", 1, debug)
        debug_print(str(tr), 2, debug)
        pre_pad = np.zeros(int(pre_pad_secs * tr.stats.sampling_rate))
        post_pad = np.zeros(int(post_pad_secs * tr.stats.sampling_rate))
        pre_pad_len = len(pre_pad)
        post_pad_len = len(post_pad)
        debug_print(
            "Taking only valid data between %i and %i samples" %
            (pre_pad_len, len(tr.data) - post_pad_len), 1, debug)
        # Re-apply the pads, taking only the data section that was valid
        tr.data = np.concatenate([
            pre_pad, tr.data[pre_pad_len:len(tr.data) - post_pad_len], post_pad
        ])
        debug_print(str(tr), 2, debug)
    # Sanity check to ensure files are daylong
    if float(tr.stats.npts / tr.stats.sampling_rate) != length and clip:
        debug_print(
            'Data for ' + tr.stats.station + '.' + tr.stats.channel +
            ' are not of daylong length, will zero pad', 1, debug)
        # Use obspy's trim function with zero padding
        tr = tr.trim(starttime,
                     starttime + length,
                     pad=True,
                     fill_value=0,
                     nearest_sample=True)
        # If there is one sample too many after this remove the last one
        # by convention
        if len(tr.data) == (length * tr.stats.sampling_rate) + 1:
            tr.data = tr.data[1:len(tr.data)]
        if not tr.stats.sampling_rate * length == tr.stats.npts:
            raise ValueError('Data are not daylong for ' + tr.stats.station +
                             '.' + tr.stats.channel)
    # Replace the gaps with zeros
    if gappy:
        tr = _zero_pad_gaps(tr, gaps, fill_gaps=fill_gaps)
    # Final visual check for debug
    if debug > 4:
        tr.plot()
    return tr
示例#2
0
def shortproc(st,
              lowcut,
              highcut,
              filt_order,
              samp_rate,
              debug=0,
              parallel=False,
              num_cores=False,
              starttime=None,
              endtime=None,
              seisan_chan_names=False,
              fill_gaps=True):
    """
    Basic function to bandpass and downsample.

    Works in place on data.  This is employed to ensure all parts of the
    data are processed in the same way.

    :type st: obspy.core.stream.Stream
    :param st: Stream to process
    :type lowcut: float
    :param lowcut: Low cut for bandpass in Hz
    :type highcut: float
    :param highcut: High cut for bandpass in Hz
    :type filt_order: int
    :param filt_order: Number of corners for bandpass filter
    :type samp_rate: float
    :param samp_rate: Sampling rate desired in Hz
    :type debug: int
    :param debug: Debug flag from 0-5, higher numbers = more output
    :type parallel: bool
    :param parallel: Set to True to process traces in parallel, for small \
        numbers of traces this is often slower than serial processing, \
        defaults to False
    :type num_cores: int
    :param num_cores: Control the number of cores for parallel processing, \
        if set to False then this will use all the cores.
    :type starttime: obspy.core.utcdatetime.UTCDateTime
    :param starttime:
        Desired data start time, will trim to this before processing
    :type endtime: obspy.core.utcdatetime.UTCDateTime
    :param endtime:
        Desired data end time, will trim to this before processing
    :type seisan_chan_names: bool
    :param seisan_chan_names:
        Whether channels are named like seisan channels (which are two letters
        rather than SEED convention of three) - defaults to True.
    :type fill_gaps: bool
    :param fill_gaps: Whether to pad any gaps found with zeros or not.

    :return: Processed stream
    :rtype: :class:`obspy.core.stream.Stream`

    .. note:: Will convert channel names to two characters long.

    .. warning::
        If you intend to use this for processing templates you should consider
        how resampling will impact your cross-correlations. Minor differences
        in resampling between day-long files (which you are likely to use for
        continuous detection) and shorter files will reduce your
        cross-correlations!

    .. rubric:: Example, bandpass

    >>> from obspy import read
    >>> from eqcorrscan.utils.pre_processing import shortproc
    >>> st = read('eqcorrscan/tests/test_data/WAV/TEST_/' +
    ...           '2013-09-01-0410-35.DFDPC_024_00')
    >>> st = shortproc(st=st, lowcut=2, highcut=9, filt_order=3, samp_rate=20,
    ...                debug=0, parallel=True, num_cores=2)
    >>> print(st[0])
    AF.LABE..SHZ | 2013-09-01T04:10:35.700000Z - 2013-09-01T04:12:05.650000Z \
| 20.0 Hz, 1800 samples

    .. rubric:: Example, low-pass

    >>> from obspy import read
    >>> from eqcorrscan.utils.pre_processing import shortproc
    >>> st = read('eqcorrscan/tests/test_data/WAV/TEST_/' +
    ...           '2013-09-01-0410-35.DFDPC_024_00')
    >>> st = shortproc(st=st, lowcut=None, highcut=9, filt_order=3,
    ...                samp_rate=20, debug=0)
    >>> print(st[0])
    AF.LABE..SHZ | 2013-09-01T04:10:35.700000Z - 2013-09-01T04:12:05.650000Z \
| 20.0 Hz, 1800 samples

    .. rubric:: Example, high-pass

    >>> from obspy import read
    >>> from eqcorrscan.utils.pre_processing import shortproc
    >>> st = read('eqcorrscan/tests/test_data/WAV/TEST_/' +
    ...           '2013-09-01-0410-35.DFDPC_024_00')
    >>> st = shortproc(st=st, lowcut=2, highcut=None, filt_order=3,
    ...                samp_rate=20, debug=0)
    >>> print(st[0])
    AF.LABE..SHZ | 2013-09-01T04:10:35.700000Z - 2013-09-01T04:12:05.650000Z \
| 20.0 Hz, 1800 samples
    """
    if isinstance(st, Trace):
        tracein = True
        st = Stream(st)
    else:
        tracein = False
    # Add sanity check for filter
    if highcut and highcut >= 0.5 * samp_rate:
        raise IOError('Highcut must be lower than the nyquist')
    if debug > 4:
        parallel = False
    if starttime is not None and endtime is not None:
        for tr in st:
            tr.trim(starttime, endtime)
            if len(tr.data) == (
                (endtime - starttime) * tr.stats.sampling_rate) + 1:
                tr.data = tr.data[1:len(tr.data)]
    elif starttime:
        for tr in st:
            tr.trim(starttime=starttime)
    elif endtime:
        for tr in st:
            tr.trim(endtime=endtime)
    for tr in st:
        if len(tr.data) == 0:
            st.remove(tr)
            debug_print(
                'No data for %s.%s after trim' %
                (tr.stats.station, tr.stats.channel), 1, debug)
    if parallel:
        if not num_cores:
            num_cores = cpu_count()
        if num_cores > len(st):
            num_cores = len(st)
        pool = Pool(processes=num_cores)
        results = [
            pool.apply_async(
                process, (tr, ), {
                    'lowcut': lowcut,
                    'highcut': highcut,
                    'filt_order': filt_order,
                    'samp_rate': samp_rate,
                    'debug': debug,
                    'starttime': False,
                    'clip': False,
                    'seisan_chan_names': seisan_chan_names,
                    'fill_gaps': fill_gaps
                }) for tr in st
        ]
        pool.close()
        stream_list = [p.get() for p in results]
        pool.join()
        st = Stream(stream_list)
    else:
        for i, tr in enumerate(st):
            st[i] = process(tr=tr,
                            lowcut=lowcut,
                            highcut=highcut,
                            filt_order=filt_order,
                            samp_rate=samp_rate,
                            debug=debug,
                            starttime=False,
                            clip=False,
                            seisan_chan_names=seisan_chan_names,
                            fill_gaps=fill_gaps)
    if tracein:
        st.merge()
        return st[0]
    return st
示例#3
0
def brightness(stations,
               nodes,
               lags,
               stream,
               threshold,
               thresh_type,
               template_length,
               template_saveloc,
               coherence_thresh,
               coherence_stations=['all'],
               coherence_clip=False,
               gap=2.0,
               clip_level=100,
               instance=0,
               pre_pick=0.2,
               plotvar=False,
               plotsave=True,
               cores=1,
               debug=0,
               mem_issue=False):
    """
    Calculate the brightness function for a single day.

    Written to calculate the brightness function for a single day of data,
    using moveouts from a 3D travel-time grid.

    .. Note::
        Data in stream must be all of the same length and have the same
        sampling rates, see :func:`eqcorrscan.utils.pre_processing.dayproc`

    :type stations: list
    :param stations:
        List of station names from in the form where stations[i] refers to
        nodes[i][:] and lags[i][:]
    :type nodes: list
    :param nodes:
        List of node points where nodes[i] refers to stations[i] and
        nodes[:][:][0] is latitude in degrees, nodes[:][:][1] is longitude in
        degrees, nodes[:][:][2] is depth in km.
    :type lags: numpy.ndarray
    :param lags:
        Array of arrays where lags[i][:] refers to stations[i]. lags[i][j]
        should be the delay to the nodes[i][j] for stations[i] in seconds.
    :type stream: obspy.core.stream.Stream
    :param stream: Data through which to look for detections.
    :type threshold: float
    :param threshold:
        Threshold value for detection of template within the brightness
        function.
    :type thresh_type: str
    :param thresh_type:
        Either MAD or abs where MAD is the Median Absolute Deviation and abs
        is an absolute brightness.
    :type template_length: float
    :param template_length: Length of template to extract in seconds
    :type template_saveloc: str
    :param template_saveloc: Path of where to save the templates.
    :type coherence_thresh: tuple
    :param coherence_thresh:
            Threshold for removing incoherent peaks in the network response,
            those below this will not be used as templates. Must be in the
            form of (a,b) where the coherence is given by: :math:`a-kchan/b`
            where kchan is the number of channels used to compute the
            coherence.
    :type coherence_stations: list
    :param coherence_stations:
        List of stations to use in the coherence thresholding - defaults to
        `all` which uses all the stations.
    :type coherence_clip: tuple
    :param coherence_clip:
        Start and end in seconds of data to window around, defaults to False,
        which uses all the data given.
    :type gap: float
    :param gap: Minimum inter-event time in seconds for detections.
    :type clip_level: float
    :param clip_level:
        Multiplier applied to the mean deviation of the energy as an upper
        limit, used to remove spikes (earthquakes, lightning, electrical
        spikes) from the energy stack.
    :type instance: int
    :param instance:
        Optional, used for tracking when using a distributed computing system.
    :type pre_pick: float
    :param pre_pick: Seconds before the detection time to include in template
    :type plotvar: bool
    :param plotvar: Turn plotting on or off
    :type plotsave: bool
    :param plotsave:
        Save or show plots, if `False` will try and show the plots on screen -
        as this is designed for bulk use this is set to `True` to save any
        plots rather than show them if you create them - changes the backend
        of matplotlib, so if is set to `False` you will see NO PLOTS!
    :type cores: int
    :param cores: Number of cores to use, defaults to 1.
    :type debug: int
    :param debug: Debug level from 0-5, higher is more output.
    :type mem_issue: bool
    :param mem_issue:
        Set to True to write temporary variables to disk rather than store in
        memory, slow.

    :return: list of templates as :class:`obspy.core.stream.Stream` objects
    :rtype: list
    """
    if plotsave:
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
        plt.ioff()
    from eqcorrscan.utils import plotting
    from eqcorrscan.utils.debug_log import debug_print
    # Check that we actually have the correct stations
    realstations = []
    for station in stations:
        st = stream.select(station=station)
        if st:
            realstations += station
    del st
    stream_copy = stream.copy()
    # Force convert to int16
    for tr in stream_copy:
        # int16 max range is +/- 32767
        if max(abs(tr.data)) > 32767:
            tr.data = 32767 * (tr.data / max(abs(tr.data)))
            # Make sure that the data aren't clipped it they are high gain
            # scale the data
        tr.data = tr.data.astype(np.int16)
    # The internal _node_loop converts energy to int16 too to conserve memory,
    # to do this it forces the maximum of a single energy trace to be 500 and
    # normalises to this level - this only works for fewer than 65 channels of
    # data
    if len(stream_copy) > 130:
        raise BrightnessError(
            'Too many streams, either re-code and cope with either more memory'
            ' usage, or less precision, or reduce data volume')
    # Loop through each node in the input
    # Linear run
    print('Computing the energy stacks')
    # Parallel run
    num_cores = cores
    if num_cores > len(nodes):
        num_cores = len(nodes)
    if num_cores > cpu_count():
        num_cores = cpu_count()
    if mem_issue and not os.path.isdir('tmp' + str(instance)):
        os.makedirs('tmp' + str(instance))
    pool = Pool(processes=num_cores)
    results = [
        pool.apply_async(
            _node_loop, (stations, ), {
                'lags': lags[:, i],
                'stream': stream,
                'i': i,
                'clip_level': clip_level,
                'mem_issue': mem_issue,
                'instance': instance
            }) for i in range(len(nodes))
    ]
    pool.close()
    if not mem_issue:
        print('Computing the cumulative network response from memory')
        energy = [p.get() for p in results]
        pool.join()
        energy.sort(key=lambda tup: tup[0])
        energy = [node[1] for node in energy]
        energy = np.concatenate(energy, axis=0)
        print(energy.shape)
    else:
        pool.join()
        del results
    # Now compute the cumulative network response and then detect possible
    # events
    if not mem_issue:
        print(energy.shape)
        indices = np.argmax(energy, axis=0)  # Indices of maximum energy
        print(indices.shape)
        cum_net_resp = np.array([np.nan] * len(indices))
        cum_net_resp[0] = energy[indices[0]][0]
        peak_nodes = [nodes[indices[0]]]
        for i in range(1, len(indices)):
            cum_net_resp[i] = energy[indices[i]][i]
            peak_nodes.append(nodes[indices[i]])
        del energy, indices
    else:
        print('Reading the temp files and computing network response')
        node_splits = int(len(nodes) // num_cores)
        print(node_splits)
        indices = []
        for i in range(num_cores):
            indices.append(
                list(np.arange(node_splits * i, node_splits * (i + 1))))
        indices[-1] += list(np.arange(node_splits * (i + 1), len(nodes)))
        # results = [_cum_net_resp(node_lis=indices[i], instance=instance)
        #            for i in range(num_cores)]
        pool = Pool(processes=num_cores)
        results = [
            pool.apply_async(_cum_net_resp, args=(indices[i], instance))
            for i in range(num_cores)
        ]
        pool.close()
        results = [p.get() for p in results]
        pool.join()
        responses = [result[0] for result in results]
        print(np.shape(responses))
        node_indices = [result[1] for result in results]
        cum_net_resp = np.array(responses)
        indices = np.argmax(cum_net_resp, axis=0)
        print(indices.shape)
        print(cum_net_resp.shape)
        cum_net_resp = np.array(
            [cum_net_resp[indices[i]][i] for i in range(len(indices))])
        peak_nodes = [
            nodes[node_indices[indices[i]][i]] for i in range(len(indices))
        ]
        del indices, node_indices
    if plotvar:
        cum_net_trace = Stream(
            Trace(data=cum_net_resp,
                  header=Stats({
                      'station': 'NR',
                      'channel': '',
                      'network': 'Z',
                      'location': '',
                      'starttime': stream[0].stats.starttime,
                      'sampling_rate': stream[0].stats.sampling_rate
                  })))
        cum_net_trace += stream.select(channel='*N')
        cum_net_trace += stream.select(channel='*1')
        cum_net_trace.sort(['network', 'station', 'channel'])

    # Find detection within this network response
    print('Finding detections in the cumulative network response')
    detections = _find_detections(cum_net_resp, peak_nodes, threshold,
                                  thresh_type, stream[0].stats.sampling_rate,
                                  realstations, gap)
    del cum_net_resp
    templates = []
    nodesout = []
    good_detections = []
    if detections:
        print('Converting detections into templates')
        # Generate a catalog of detections
        # detections_cat = Catalog()
        for j, detection in enumerate(detections):
            debug_print(
                'Converting for detection %i of %i' % (j, len(detections)), 3,
                debug)
            # Create an event for each detection
            event = Event()
            # Set up some header info for the event
            event.event_descriptions.append(EventDescription())
            event.event_descriptions[0].text = 'Brightness detection'
            event.creation_info = CreationInfo(agency_id='EQcorrscan')
            copy_of_stream = deepcopy(stream_copy)
            # Convert detections to obspy.core.event type -
            # name of detection template is the node.
            node = (detection.template_name.split('_')[0],
                    detection.template_name.split('_')[1],
                    detection.template_name.split('_')[2])
            # Look up node in nodes and find the associated lags
            index = nodes.index(
                (float(node[0]), float(node[1]), float(node[2])))
            detect_lags = lags[:, index]
            ksta = Comment(text='Number of stations=' + str(len(detect_lags)))
            event.origins.append(Origin())
            event.origins[0].comments.append(ksta)
            event.origins[0].time = copy_of_stream[0].stats.starttime +\
                detect_lags[0] + detection.detect_time
            event.origins[0].latitude = node[0]
            event.origins[0].longitude = node[1]
            event.origins[0].depth = node[2]
            for i, detect_lag in enumerate(detect_lags):
                station = stations[i]
                st = copy_of_stream.select(station=station)
                if len(st) != 0:
                    for tr in st:
                        _waveform_id = WaveformStreamID(
                            station_code=tr.stats.station,
                            channel_code=tr.stats.channel,
                            network_code=tr.stats.network)
                        event.picks.append(
                            Pick(waveform_id=_waveform_id,
                                 time=tr.stats.starttime + detect_lag +
                                 detection.detect_time + pre_pick,
                                 onset='emergent',
                                 evalutation_mode='automatic'))
            debug_print('Generating template for detection: %i' % j, 0, debug)
            template = template_gen(picks=event.picks,
                                    st=copy_of_stream,
                                    length=template_length,
                                    swin='all')
            template_name = template_saveloc + '/' +\
                str(template[0].stats.starttime) + '.ms'
            # In the interests of RAM conservation we write then read
            # Check coherency here!
            temp_coher, kchan = coherence(template, coherence_stations,
                                          coherence_clip)
            coh_thresh = float(coherence_thresh[0]) - kchan / \
                float(coherence_thresh[1])
            coherent = False
            if temp_coher > coh_thresh:
                template.write(template_name, format="MSEED")
                print('Written template as: ' + template_name)
                print('---------------------------------coherence LEVEL: ' +
                      str(temp_coher))
                coherent = True
                debug_print(
                    'Template was incoherent, coherence level: ' +
                    str(temp_coher), 0, debug)
                coherent = False
            del copy_of_stream, tr, template
            if coherent:
                templates.append(obsread(template_name))
                nodesout += [node]
                good_detections.append(detection)
            debug_print('No template for you', 0, debug)
            # detections_cat += event
    if plotvar:
        good_detections = [(cum_net_trace[-1].stats.starttime +
                            detection.detect_time).datetime
                           for detection in good_detections]
        if not plotsave:
            plotting.NR_plot(cum_net_trace[0:-1],
                             Stream(cum_net_trace[-1]),
                             detections=good_detections,
                             size=(18.5, 10),
                             title='Network response')
            # cum_net_trace.plot(size=(800,600), equal_scale=False)
        else:
            savefile = 'plots/' +\
                cum_net_trace[0].stats.starttime.datetime.strftime('%Y%m%d') +\
                '_NR_timeseries.pdf'
            plotting.NR_plot(cum_net_trace[0:-1],
                             Stream(cum_net_trace[-1]),
                             detections=good_detections,
                             size=(18.5, 10),
                             save=True,
                             savefile=savefile,
                             title='Network response')
    nodesout = list(set(nodesout))
    return templates, nodesout
示例#4
0
def dayproc(st,
            lowcut,
            highcut,
            filt_order,
            samp_rate,
            starttime,
            debug=0,
            parallel=True,
            num_cores=False,
            ignore_length=False,
            seisan_chan_names=False,
            fill_gaps=True):
    """
    Wrapper for dayproc to parallel multiple traces in a stream.

    Works in place on data.  This is employed to ensure all parts of the data \
    are processed in the same way.

    :type st: obspy.core.stream.Stream
    :param st: Stream to process (can be trace).
    :type lowcut: float
    :param lowcut: Low cut in Hz for bandpass.
    :type highcut: float
    :param highcut: High cut in Hz for bandpass.
    :type filt_order: int
    :param filt_order: Corners for bandpass.
    :type samp_rate: float
    :param samp_rate: Desired sampling rate in Hz.
    :type starttime: obspy.core.utcdatetime.UTCDateTime
    :param starttime: Desired start-date of trace.
    :type debug: int
    :param debug: Debug output level from 0-5, higher numbers = more output.
    :type parallel: bool
    :param parallel:
        Set to True to process traces in parallel, this is often faster than
        serial processing of traces: defaults to True.
    :type num_cores: int
    :param num_cores:
        Control the number of cores for parallel processing, if set to False
        then this will use all the cores.
    :type ignore_length: bool
    :param ignore_length: See warning below.
    :type seisan_chan_names: bool
    :param seisan_chan_names:
        Whether channels are named like seisan channels (which are two letters
        rather than SEED convention of three) - defaults to True.
    :type fill_gaps: bool
    :param fill_gaps: Whether to pad any gaps found with zeros or not.

    :return: Processed stream.
    :rtype: :class:`obspy.core.stream.Stream`

    .. note:: Will convert channel names to two characters long.

    .. warning::
        Will fail if data are less than 19.2 hours long - this number is
        arbitrary and is chosen to alert the user to the dangers of padding
        to day-long, if you don't care you can ignore this error by setting
        `ignore_length=True`. Use this option at your own risk!  It will also
        warn any-time it has to pad data - if you see strange artifacts in your
        detections, check whether the data have gaps.

    .. rubric:: Example

    >>> import obspy
    >>> if int(obspy.__version__.split('.')[0]) >= 1:
    ...     from obspy.clients.fdsn import Client
    ... else:
    ...     from obspy.fdsn import Client
    >>> from obspy import UTCDateTime
    >>> from eqcorrscan.utils.pre_processing import dayproc
    >>> client = Client('NCEDC')
    >>> t1 = UTCDateTime(2012, 3, 26)
    >>> t2 = t1 + 86400
    >>> bulk_info = [('BP', 'JCNB', '40', 'SP1', t1, t2)]
    >>> st = client.get_waveforms_bulk(bulk_info)
    >>> st_keep = st.copy()  # Copy the stream for later examples
    >>> # Example of bandpass filtering
    >>> st = dayproc(st=st, lowcut=2, highcut=9, filt_order=3, samp_rate=20,
    ...              starttime=t1, debug=0, parallel=True, num_cores=2)
    >>> print(st[0])
    BP.JCNB.40.SP1 | 2012-03-26T00:00:00.000000Z - 2012-03-26T23:59:59.\
950000Z | 20.0 Hz, 1728000 samples
    >>> # Example of lowpass filtering
    >>> st = dayproc(st=st, lowcut=None, highcut=9, filt_order=3, samp_rate=20,
    ...              starttime=t1, debug=0, parallel=True, num_cores=2)
    >>> print(st[0])
    BP.JCNB.40.SP1 | 2012-03-26T00:00:00.000000Z - 2012-03-26T23:59:59.\
950000Z | 20.0 Hz, 1728000 samples
    >>> # Example of highpass filtering
    >>> st = dayproc(st=st, lowcut=2, highcut=None, filt_order=3, samp_rate=20,
    ...              starttime=t1, debug=0, parallel=True, num_cores=2)
    >>> print(st[0])
    BP.JCNB.40.SP1 | 2012-03-26T00:00:00.000000Z - 2012-03-26T23:59:59.\
950000Z | 20.0 Hz, 1728000 samples
    """
    # Add sanity check for filter
    if isinstance(st, Trace):
        st = Stream(st)
        tracein = True
    else:
        tracein = False
    if highcut and highcut >= 0.5 * samp_rate:
        raise IOError('Highcut must be lower than the nyquist')
    if debug > 4:
        parallel = False
    # Set the start-time to a day start - cope with
    if starttime is None:
        startdates = []
        for tr in st:
            if abs(tr.stats.starttime - (UTCDateTime(tr.stats.starttime.date) +
                                         86400)) < tr.stats.delta:
                # If the trace starts within 1 sample of the next day, use the
                # next day as the startdate
                startdates.append((tr.stats.starttime + 86400).date)
                debug_print(
                    '{0} starts within 1 sample of the next day, using this '
                    'time {1}'.format(tr.id,
                                      (tr.stats.starttime + 86400).date), 2,
                    debug)
            else:
                startdates.append(tr.stats.starttime.date)
        # Check that all traces start on the same date...
        if not len(set(startdates)) == 1:
            raise NotImplementedError('Traces start on different days')
        starttime = UTCDateTime(startdates[0])
    if parallel:
        if not num_cores:
            num_cores = cpu_count()
        if num_cores > len(st):
            num_cores = len(st)
        pool = Pool(processes=num_cores)
        results = [
            pool.apply_async(
                process, (tr, ), {
                    'lowcut': lowcut,
                    'highcut': highcut,
                    'filt_order': filt_order,
                    'samp_rate': samp_rate,
                    'debug': debug,
                    'starttime': starttime,
                    'clip': True,
                    'ignore_length': ignore_length,
                    'length': 86400,
                    'seisan_chan_names': seisan_chan_names,
                    'fill_gaps': fill_gaps
                }) for tr in st
        ]
        pool.close()
        stream_list = [p.get() for p in results]
        pool.join()
        st = Stream(stream_list)
    else:
        for i, tr in enumerate(st):
            st[i] = process(tr=tr,
                            lowcut=lowcut,
                            highcut=highcut,
                            filt_order=filt_order,
                            samp_rate=samp_rate,
                            debug=debug,
                            starttime=starttime,
                            clip=True,
                            length=86400,
                            ignore_length=ignore_length,
                            seisan_chan_names=seisan_chan_names,
                            fill_gaps=fill_gaps)
    for tr in st:
        if len(tr.data) == 0:
            st.remove(tr)
    if tracein:
        st.merge()
        return st[0]
    return st
示例#5
0
def find_peaks2_short(arr,
                      thresh,
                      trig_int,
                      debug=0,
                      starttime=False,
                      samp_rate=1.0,
                      full_peaks=False):
    """
    Determine peaks in an array of data above a certain threshold.

    Uses a mask to remove data below threshold and finds peaks in what is left.

    :type arr: numpy.ndarray
    :param arr: 1-D numpy array is required
    :type thresh: float
    :param thresh:
        The threshold below which will be considered noise and peaks will
        not be found in.
    :type trig_int: int
    :param trig_int:
        The minimum difference in samples between triggers, if multiple
        peaks within this window this code will find the highest.
    :type debug: int
    :param debug: Optional, debug level 0-5
    :type starttime: obspy.core.utcdatetime.UTCDateTime
    :param starttime: Starttime for plotting, only used if debug > 2.
    :type samp_rate: float
    :param samp_rate: Sampling rate in Hz, only used for plotting if debug > 2.
    :type full_peaks: bool
    :param full_peaks:
        If True, will remove the issue eluded to below, by declustering within
        data-sections above the threshold, rather than just taking the peak
        within that section. This will take more time. This defaults to True
        for match_filter.

    :return: peaks: Lists of tuples of peak values and locations.
    :rtype: list


    >>> import numpy as np
    >>> arr = np.random.randn(100)
    >>> threshold = 10
    >>> arr[40] = 20
    >>> arr[60] = 100
    >>> find_peaks2_short(arr, threshold, 3)
    [(20.0, 40), (100.0, 60)]

    .. note::
        peak-finding is optimised for zero-mean cross-correlation data where
        fluctuations are frequent.  Because of this, in certain cases some
        peaks may be missed if the trig_int is short and the threshold is low.
        Consider the following case:

        >>> arr = np.array([1, .2, .2, .2, .2, 1, .2, .2, .2, .2, 1])
        >>> find_peaks2_short(arr, thresh=.2, trig_int=3)
        [(1.0, 0)]

        Whereas you would expect the following:

        >>> arr = np.array([1, .2, .2, .2, .2, 1, .2, .2, .2, .2, 1])
        >>> find_peaks2_short(arr, thresh=.2, trig_int=3, full_peaks=True)
        [(1.0, 0), (1.0, 5), (1.0, 10)]

        This is rare and unlikely to happen for correlation cases, where
        trigger intervals are usually large and thresholds high.

    """
    if not starttime:
        starttime = UTCDateTime(0)
    # Set everything below the threshold to zero
    image = np.copy(arr)
    image = np.abs(image)
    debug_print("Threshold: {0}\tMax: {1}".format(thresh, max(image)), 2,
                debug)
    image[image < thresh] = 0
    if len(image[image > thresh]) == 0:
        debug_print("No values over threshold {0}".format(thresh), 0, debug)
        return []
    debug_print(
        'Found {0} samples above the threshold'.format(
            len(image[image > thresh])), 0, debug)
    initial_peaks = []
    # Find the peaks
    labeled_image, number_of_objects = ndimage.label(image)
    peak_slices = ndimage.find_objects(labeled_image)
    for peak_slice in peak_slices:
        window = arr[peak_slice[0].start:peak_slice[0].stop]
        if peak_slice[0].stop - peak_slice[0].start > trig_int and full_peaks:
            peaks = decluster(peaks=window,
                              trig_int=trig_int,
                              index=np.arange(peak_slice[0].start,
                                              peak_slice[0].stop))
        else:
            peaks = [(window[np.argmax(abs(window))],
                      int(peak_slice[0].start + np.argmax(abs(window))))]
        initial_peaks.extend(peaks)
    peaks = decluster(peaks=np.array(list(zip(*initial_peaks))[0]),
                      index=np.array(list(zip(*initial_peaks))[1]),
                      trig_int=trig_int)
    if initial_peaks:
        if debug >= 3:
            from eqcorrscan.utils import plotting
            _fname = ''.join(
                ['peaks_',
                 starttime.datetime.strftime('%Y-%m-%d'), '.pdf'])
            plotting.peaks_plot(data=image,
                                starttime=starttime,
                                samp_rate=samp_rate,
                                save=True,
                                peaks=peaks,
                                savefile=_fname)
        peaks = sorted(peaks, key=lambda time: time[1], reverse=False)
        return peaks
    else:
        print('No peaks for you!')
        return []
示例#6
0
def _detect(detector,
            st,
            threshold,
            trig_int,
            moveout=0,
            min_trig=0,
            process=True,
            extract_detections=False,
            debug=0):
    """
    Detect within continuous data using the subspace method.

    Not to be called directly, use the detector.detect method.

    :type detector: eqcorrscan.core.subspace.Detector
    :param detector: Detector to use.
    :type st: obspy.core.stream.Stream
    :param st: Un-processed stream to detect within using the subspace \
        detector
    :type threshold: float
    :param threshold: Threshold value for detections between 0-1
    :type trig_int: float
    :param trig_int: Minimum trigger interval in seconds.
    :type moveout: float
    :param moveout: Maximum allowable moveout window for non-multiplexed,
        network detection.  See note.
    :type min_trig: int
    :param min_trig: Minimum number of stations exceeding threshold for \
        non-multiplexed, network detection. See note.
    :type process: bool
    :param process: Whether or not to process the stream according to the \
        parameters defined by the detector.  Default is to process the \
        data (True).
    :type extract_detections: bool
    :param extract_detections: Whether to extract waveforms for each \
        detection or not, if true will return detections and streams.
    :type debug: int
    :param debug: Debug output level from 0-5.

    :return: list of detections
    :rtype: list of eqcorrscan.core.match_filter.Detection
    """
    detections = []
    # First process the stream
    if process:
        debug_print('Processing Stream', 0, debug)
        stream, stachans = _subspace_process(
            streams=[st.copy()],
            lowcut=detector.lowcut,
            highcut=detector.highcut,
            filt_order=detector.filt_order,
            sampling_rate=detector.sampling_rate,
            multiplex=detector.multiplex,
            stachans=detector.stachans,
            parallel=True,
            align=False,
            shift_len=None,
            reject=False)
    else:
        # Check the sampling rate at the very least
        for tr in st:
            if not tr.stats.sampling_rate == detector.sampling_rate:
                raise ValueError('Sampling rates do not match.')
        stream = [st]
        stachans = detector.stachans
    outtic = time.clock()
    # If multiplexed, how many samples do we increment by?
    if detector.multiplex:
        Nc = len(detector.stachans)
    else:
        Nc = 1
    # Here do all ffts
    fft_vars = _do_ffts(detector, stream, Nc)
    debug_print('Computing detection statistics', 0, debug)
    debug_print('Preallocating stats matrix', 0, debug)
    stats = np.zeros(
        (len(stream[0]), (len(stream[0][0]) // Nc) - (fft_vars[4] // Nc) + 1))
    for det_freq, data_freq_sq, data_freq, i in zip(fft_vars[0], fft_vars[1],
                                                    fft_vars[2],
                                                    np.arange(len(stream[0]))):
        # Calculate det_statistic in frequency domain
        stats[i] = _det_stat_freq(det_freq, data_freq_sq, data_freq,
                                  fft_vars[3], Nc, fft_vars[4], fft_vars[5])
        debug_print('Stats matrix is shape %s' % str(stats[i].shape), 0, debug)
        if debug >= 3:
            fig, ax = plt.subplots()
            t = np.arange(len(stats[i]))
            ax.plot(t, stats[i], color='k')
            ax.axis('tight')
            ax.set_ylim([0, 1])
            ax.plot([min(t), max(t)], [threshold, threshold],
                    color='r',
                    lw=1,
                    label='Threshold')
            ax.legend()
            plt.title('%s' % str(stream[0][i].stats.station))
            plt.show()
    trig_int_samples = detector.sampling_rate * trig_int
    debug_print('Finding peaks', 0, debug)
    peaks = []
    for i in range(len(stream[0])):
        peaks.append(
            findpeaks.find_peaks2_short(arr=stats[i],
                                        thresh=threshold,
                                        trig_int=trig_int_samples,
                                        debug=debug))
    if not detector.multiplex:
        # Conduct network coincidence triggering
        peaks = findpeaks.coin_trig(peaks=peaks,
                                    samp_rate=detector.sampling_rate,
                                    moveout=moveout,
                                    min_trig=min_trig,
                                    stachans=stachans,
                                    trig_int=trig_int)
    else:
        peaks = peaks[0]
    if len(peaks) > 0:
        for peak in peaks:
            detecttime = st[0].stats.starttime + \
                (peak[1] / detector.sampling_rate)
            rid = ResourceIdentifier(id=detector.name + '_' + str(detecttime),
                                     prefix='smi:local')
            ev = Event(resource_id=rid)
            cr_i = CreationInfo(author='EQcorrscan',
                                creation_time=UTCDateTime())
            ev.creation_info = cr_i
            # All detection info in Comments for lack of a better idea
            thresh_str = 'threshold=' + str(threshold)
            ccc_str = 'detect_val=' + str(peak[0])
            used_chans = 'channels used: ' +\
                ' '.join([str(pair) for pair in detector.stachans])
            ev.comments.append(Comment(text=thresh_str))
            ev.comments.append(Comment(text=ccc_str))
            ev.comments.append(Comment(text=used_chans))
            for stachan in detector.stachans:
                tr = st.select(station=stachan[0], channel=stachan[1])
                if tr:
                    net_code = tr[0].stats.network
                else:
                    net_code = ''
                pick_tm = detecttime
                wv_id = WaveformStreamID(network_code=net_code,
                                         station_code=stachan[0],
                                         channel_code=stachan[1])
                ev.picks.append(Pick(time=pick_tm, waveform_id=wv_id))
            detections.append(
                Detection(template_name=detector.name,
                          detect_time=detecttime,
                          no_chans=len(detector.stachans),
                          detect_val=peak[0],
                          threshold=threshold,
                          typeofdet='subspace',
                          threshold_type='abs',
                          threshold_input=threshold,
                          chans=detector.stachans,
                          event=ev))
    outtoc = time.clock()
    print('Detection took %s seconds' % str(outtoc - outtic))
    if extract_detections:
        detection_streams = extract_from_stream(st, detections)
        return detections, detection_streams
    return detections
示例#7
0
def lag_calc(detections,
             detect_data,
             template_names,
             templates,
             shift_len=0.2,
             min_cc=0.4,
             horizontal_chans=['E', 'N', '1', '2'],
             vertical_chans=['Z'],
             cores=1,
             interpolate=False,
             plot=False,
             parallel=True,
             debug=0):
    """
    Main lag-calculation function for detections of specific events.

    Overseer function to take a list of detection objects, cut the data for
    them to lengths of the same length of the template + shift_len on
    either side. This will output a :class:`obspy.core.event.Catalog` of
    picked events. Pick times are based on the lag-times found at the maximum
    correlation, providing that correlation is above the min_cc.

    :type detections: list
    :param detections:
        List of :class:`eqcorrscan.core.match_filter.Detection` objects.
    :type detect_data: obspy.core.stream.Stream
    :param detect_data:
        All the data needed to cut from - can be a gappy Stream.
    :type template_names: list
    :param template_names:
        List of the template names, used to help identify families of events.
        Must be in the same order as templates.
    :type templates: list
    :param templates:
        List of the templates, templates must be a list of
         :class:`obspy.core.stream.Stream` objects.
    :type shift_len: float
    :param shift_len:
        Shift length allowed for the pick in seconds, will be plus/minus this
        amount - default=0.2
    :type min_cc: float
    :param min_cc:
        Minimum cross-correlation value to be considered a pick, default=0.4.
    :type horizontal_chans: list
    :param horizontal_chans:
        List of channel endings for horizontal-channels, on which S-picks will
        be made.
    :type vertical_chans: list
    :param vertical_chans:
        List of channel endings for vertical-channels, on which P-picks will
        be made.
    :type cores: int
    :param cores:
        Number of cores to use in parallel processing, defaults to one.
    :type interpolate: bool
    :param interpolate:
        Interpolate the correlation function to achieve sub-sample precision.
    :type plot: bool
    :param plot:
        To generate a plot for every detection or not, defaults to False
    :type parallel: bool
    :param parallel: Turn parallel processing on or off.
    :type debug: int
    :param debug: Debug output level, 0-5 with 5 being the most output.


    :returns:
        Catalog of events with picks.  No origin information is included.
        These events can then be written out via
        :func:`obspy.core.event.Catalog.write`, or to Nordic Sfiles using
        :func:`eqcorrscan.utils.sfile_util.eventtosfile` and located
        externally.
    :rtype: obspy.core.event.Catalog

    .. note::
        Picks output in catalog are generated relative to the template
        start-time.  For example, if you generated your template with a
        pre_pick time of 0.2 seconds, you should expect picks generated by
        lag_calc to occur 0.2 seconds before the true phase-pick.  This
        is because we do not currently store template meta-data alongside the
        templates.

    .. warning::
        Because of the above note, origin times will be consistently
        shifted by the static pre_pick applied to the templates.

    .. warning::
        This routine requires only one template per channel (e.g. you should
        not use templates with a P and S template on a single channel).  If
        this does occur an error will be raised.

    .. note::
        S-picks will be made on horizontal channels, and P picks made on
        vertical channels - the default is that horizontal channels end in
        one of: 'E', 'N', '1' or '2', and that vertical channels end in 'Z'.
        The options vertical_chans and horizontal_chans can be changed to suit
        your dataset.

    .. note::
        Individual channel cross-correlations are stored as a
        :class:`obspy.core.event.Comment` for each pick, and the summed
        cross-correlation value resulting from these is stored as a
        :class:`obspy.core.event.Comment` in the main
        :class:`obspy.core.event.Event` object.

    .. note::
        The order of events is preserved (e.g. detections[n] == output[n]),
        providing picks have been made for that event.  If no picks have
        been made for an event, it will not be included in the output.
        However, as each detection has an ID associated with it, these can
        be mapped to the output resource_id for each Event in the output
        Catalog. e.g.

            detections[n].id == output[m].resource_id

        if the output[m] is for the same event as detections[n].
    """
    if debug > 2 and plot:
        prep_plot = True
    else:
        prep_plot = False
    # First check that sample rates are equal for everything
    for tr in detect_data:
        if tr.stats.sampling_rate != detect_data[0].stats.sampling_rate:
            raise LagCalcError('Sampling rates are not equal')
    for template in templates:
        for tr in template:
            if tr.stats.sampling_rate != detect_data[0].stats.sampling_rate:
                raise LagCalcError('Sampling rates are not equal')
    # Work out the delays for each template
    delays = []  # List of tuples of (tempname, (sta, chan, delay))
    zipped_templates = list(zip(template_names, templates))
    detect_stachans = [(tr.stats.station, tr.stats.channel)
                       for tr in detect_data]
    for template in zipped_templates:
        temp_delays = {}
        # Remove channels not present in continuous data
        _template = template[1].copy()
        for tr in _template:
            if (tr.stats.station, tr.stats.channel) not in detect_stachans:
                _template.remove(tr)
        for tr in _template:
            temp_delays.update({
                tr.stats.station + '.' + tr.stats.channel:
                tr.stats.starttime -
                _template.sort(['starttime'])[0].stats.starttime
            })
        delays.append((template[0], temp_delays))
        del _template
    # Segregate detections by template, then feed to day_loop
    initial_cat = Catalog()
    for template in zipped_templates:
        print('Running lag-calc for template %s' % template[0])
        template_detections = [
            detection for detection in detections
            if detection.template_name == template[0]
        ]
        t_delays = [d for d in delays if d[0] == template[0]][0][1]
        debug_print('There are %i detections' % len(template_detections), 2,
                    debug)
        detect_streams = _prepare_data(detect_data=detect_data,
                                       detections=template_detections,
                                       template=template,
                                       delays=t_delays,
                                       shift_len=shift_len,
                                       plot=prep_plot)
        detect_streams = [detect_stream[1] for detect_stream in detect_streams]
        if len(template_detections) > 0:
            template_cat = _day_loop(detection_streams=detect_streams,
                                     template=template[1],
                                     min_cc=min_cc,
                                     detections=template_detections,
                                     horizontal_chans=horizontal_chans,
                                     vertical_chans=vertical_chans,
                                     interpolate=interpolate,
                                     cores=cores,
                                     parallel=parallel,
                                     debug=debug)
            initial_cat += template_cat
            if plot:
                for i, event in enumerate(template_cat):
                    if len(event.picks) == 0:
                        continue
                    plot_stream = detect_streams[i].copy()
                    template_plot = template[1].copy()
                    pick_stachans = [(pick.waveform_id.station_code,
                                      pick.waveform_id.channel_code)
                                     for pick in event.picks]
                    for tr in plot_stream:
                        if (tr.stats.station, tr.stats.channel) \
                                not in pick_stachans:
                            plot_stream.remove(tr)
                    for tr in template_plot:
                        if (tr.stats.station, tr.stats.channel) \
                                not in pick_stachans:
                            template_plot.remove(tr)
                    plot_repicked(template=template_plot,
                                  picks=event.picks,
                                  det_stream=plot_stream)
    # Order the catalogue to match the input
    output_cat = Catalog()
    for det in detections:
        event = [e for e in initial_cat if str(e.resource_id) == str(det.id)]
        if len(event) == 1:
            output_cat.append(event[0])
        elif len(event) == 0:
            print('No picks made for detection: \n%s' % det.__str__())
        else:
            raise NotImplementedError('Multiple events with same id,'
                                      ' should not happen')
    return output_cat
示例#8
0
def _day_loop(detection_streams,
              template,
              min_cc,
              detections,
              horizontal_chans,
              vertical_chans,
              interpolate,
              cores,
              parallel,
              debug=0):
    """
    Function to loop through multiple detections for one template.

    Designed to run for the same day of data for I/O simplicity, but as you
    are passing stream objects it could run for all the detections ever, as
    long as you have the RAM!

    :type detection_streams: list
    :param detection_streams:
        List of all the detections for this template that you want to compute
        the optimum pick for. Individual things in list should be of
        :class:`obspy.core.stream.Stream` type.
    :type template: obspy.core.stream.Stream
    :param template: The original template used to detect the detections passed
    :type min_cc: float
    :param min_cc: Minimum cross-correlation value to be allowed for a pick.
    :type detections: list
    :param detections:
        List of detections to associate events with an input detection.
    :type horizontal_chans: list
    :param horizontal_chans:
        List of channel endings for horizontal-channels, on which S-picks will
        be made.
    :type vertical_chans: list
    :param vertical_chans:
        List of channel endings for vertical-channels, on which P-picks will
        be made.
    :type interpolate: bool
    :param interpolate:
        Interpolate the correlation function to achieve sub-sample precision.
    :type debug: int
    :param debug: debug output level 0-5.

    :returns:
        Catalog object containing Event objects for each detection created by
        this template.
    :rtype: :class:`obspy.core.event.Catalog`
    """
    if len(detection_streams) == 0:
        return Catalog()
    if not cores:
        num_cores = cpu_count()
    else:
        num_cores = cores
    if num_cores > len(detection_streams):
        num_cores = len(detection_streams)
    if parallel:
        pool = Pool(processes=num_cores)
        debug_print('Made pool of %i workers' % num_cores, 4, debug)
        # Parallel generation of events for each detection:
        # results will be a list of (i, event class)
        results = [
            pool.apply_async(
                _channel_loop, (detection_streams[i], ), {
                    'template': template,
                    'min_cc': min_cc,
                    'detection_id': detections[i].id,
                    'interpolate': interpolate,
                    'i': i,
                    'pre_lag_ccsum': detections[i].detect_val,
                    'detect_chans': detections[i].no_chans,
                    'horizontal_chans': horizontal_chans,
                    'vertical_chans': vertical_chans
                }) for i in range(len(detection_streams))
        ]
        pool.close()
        events_list = [p.get() for p in results]
        pool.join()
        events_list.sort(key=lambda tup: tup[0])  # Sort based on index.
    else:
        events_list = []
        for i in range(len(detection_streams)):
            events_list.append(
                _channel_loop(detection=detection_streams[i],
                              template=template,
                              min_cc=min_cc,
                              detection_id=detections[i].id,
                              interpolate=interpolate,
                              i=i,
                              pre_lag_ccsum=detections[i].detect_val,
                              detect_chans=detections[i].no_chans,
                              horizontal_chans=horizontal_chans,
                              vertical_chans=vertical_chans,
                              debug=debug))
    temp_catalog = Catalog()
    temp_catalog.events = [event_tup[1] for event_tup in events_list]
    return temp_catalog
示例#9
0
def _channel_loop(detection,
                  template,
                  min_cc,
                  detection_id,
                  interpolate,
                  i,
                  pre_lag_ccsum=None,
                  detect_chans=0,
                  horizontal_chans=['E', 'N', '1', '2'],
                  vertical_chans=['Z'],
                  debug=0):
    """
    Inner loop for correlating and assigning picks.

    Utility function to take a stream of data for the detected event and write
    maximum correlation to absolute time as picks in an obspy.core.event.Event
    object.
    Only outputs picks for picks above min_cc.

    :type detection: obspy.core.stream.Stream
    :param detection:
        Stream of data for the slave event detected using template.
    :type template: obspy.core.stream.Stream
    :param template: Stream of data as the template for the detection.
    :type min_cc: float
    :param min_cc: Minimum cross-correlation value to allow a pick to be made.
    :type detection_id: str
    :param detection_id: Detection ID to associate the event with.
    :type interpolate: bool
    :param interpolate:
        Interpolate the correlation function to achieve sub-sample precision.
    :type i: int
    :param i:
        Used to track which process has occurred when running in parallel.
    :type pre_lag_ccsum: float
    :param pre_lag_ccsum:
        Cross-correlation sum before lag-calc, will check that the
        cross-correlation sum is increased by lag-calc (using all channels,
        ignoring min_cc)
    :type detect_chans: int
    :param detect_chans:
        Number of channels originally used in detections, must match the number
        used here to allow for cccsum checking.
    :type horizontal_chans: list
    :param horizontal_chans:
        List of channel endings for horizontal-channels, on which S-picks will
        be made.
    :type vertical_chans: list
    :param vertical_chans:
        List of channel endings for vertical-channels, on which P-picks will
        be made.
    :type debug: int
    :param debug: Debug output level 0-5.

    :returns:
        Event object containing network, station, channel and pick information.
    :rtype: :class:`obspy.core.event.Event`
    """
    from eqcorrscan.core.match_filter import normxcorr2
    event = Event()
    s_stachans = {}
    cccsum = 0
    checksum = 0
    used_chans = 0
    for tr in template:
        temp_net = tr.stats.network
        temp_sta = tr.stats.station
        temp_chan = tr.stats.channel
        debug_print('Working on: %s.%s.%s' % (temp_net, temp_sta, temp_chan),
                    3, debug)
        image = detection.select(station=temp_sta, channel=temp_chan)
        if len(image) == 0:
            print('No match in image.')
            continue
        if interpolate:
            try:
                ccc = normxcorr2(tr.data, image[0].data)
            except Exception:
                print('Could not calculate cc')
                print('Image is %i long' % len(image[0].data))
                print('Template is %i long' % len(tr.data))
                continue
            try:
                shift, cc_max = _xcorr_interp(ccc=ccc, dt=image[0].stats.delta)
            except IndexError:
                print('Could not interpolate ccc, not smooth')
                ccc = normxcorr2(tr.data, image[0].data)
                cc_max = np.amax(ccc)
                shift = np.argmax(ccc) * image[0].stats.delta
            # Convert the maximum cross-correlation time to an actual time
            picktime = image[0].stats.starttime + shift
        else:
            # Convert the maximum cross-correlation time to an actual time
            try:
                ccc = normxcorr2(tr.data, image[0].data)
            except Exception:
                print('Could not calculate cc')
                print('Image is %i long' % len(image[0].data))
                print('Template is %i long' % len(tr.data))
                continue
            cc_max = np.amax(ccc)
            picktime = image[0].stats.starttime + (np.argmax(ccc) *
                                                   image[0].stats.delta)
        debug_print('Maximum cross-corr=%s' % cc_max, 3, debug)
        checksum += cc_max
        used_chans += 1
        if cc_max < min_cc:
            debug_print('Correlation below threshold, not used', 3, debug)
            continue
        cccsum += cc_max
        # Perhaps weight each pick by the cc val or cc val^2?
        # weight = np.amax(ccc) ** 2
        if temp_chan[-1] in vertical_chans:
            phase = 'P'
        # Only take the S-pick with the best correlation
        elif temp_chan[-1] in horizontal_chans:
            phase = 'S'
            debug_print(
                'Making S-pick on: %s.%s.%s' % (temp_net, temp_sta, temp_chan),
                4, debug)
            if temp_sta not in s_stachans.keys():
                s_stachans[temp_sta] = ((temp_chan, np.amax(ccc), picktime))
            elif temp_sta in s_stachans.keys():
                if np.amax(ccc) > s_stachans[temp_sta][1]:
                    picktime = picktime
                else:
                    continue
        else:
            phase = None
        _waveform_id = WaveformStreamID(network_code=temp_net,
                                        station_code=temp_sta,
                                        channel_code=temp_chan)
        event.picks.append(
            Pick(waveform_id=_waveform_id,
                 time=picktime,
                 method_id=ResourceIdentifier('EQcorrscan'),
                 phase_hint=phase,
                 creation_info='eqcorrscan.core.lag_calc',
                 comments=[Comment(text='cc_max=%s' % cc_max)]))
        event.resource_id = detection_id
    ccc_str = ("detect_val=%s" % cccsum)
    event.comments.append(Comment(text=ccc_str))
    if used_chans == detect_chans:
        if pre_lag_ccsum is not None and\
           checksum - pre_lag_ccsum < -(0.30 * pre_lag_ccsum):
            msg = ('lag-calc has decreased cccsum from %f to %f - ' %
                   (pre_lag_ccsum, checksum))
            # warnings.warn(msg)
            raise LagCalcError(msg)
    else:
        warnings.warn('Cannot check if cccsum is better, used %i channels '
                      'for detection, but %i are used here' %
                      (detect_chans, used_chans))
    return i, event