示例#1
0
 def test_interp_not_enough_samples(self):
     synth_template = np.sin(np.arange(0, 2, 0.001))
     synth_detection = synth_template[11:]
     synth_template = synth_template[0:-10]
     ccc = normxcorr2(synth_detection, synth_template)[0]
     with self.assertRaises(IndexError):
         _xcorr_interp(ccc, 0.01)
示例#2
0
    def test_bad_interp(self):
        ccc = np.array([
            -0.21483282, -0.59443731, 0.1898917, -0.67516038, 0.60129057,
            -0.71043723, 0.16709118, 0.96839009, 1.58283915, -0.3053663
        ])

        _xcorr_interp(ccc, 0.1)
        self.assertEqual(len(self.log_messages['warning']), 1)
        self.assertTrue(
            'not give an accurate result' in self.log_messages['warning'][0])
示例#3
0
 def test_bad_interp(self):
     ccc = np.array([
         -0.21483282, -0.59443731, 0.1898917, -0.67516038, 0.60129057,
         -0.71043723, 0.16709118, 0.96839009, 1.58283915, -0.3053663
     ])
     with warnings.catch_warnings(record=True) as w:
         warnings.simplefilter("always")
         _xcorr_interp(ccc, 0.1)
     for _w in w:
         print(_w.message)
     self.assertEqual(len(w), 2)
     self.assertTrue('Less than 5 samples' in str(w[0].message))
     self.assertTrue('Residual in quadratic fit' in str(w[1].message))
示例#4
0
 def test_interp_few_samples(self):
     synth_template = np.sin(np.arange(0, 2, 0.001))
     synth_detection = synth_template[13:]
     synth_template = synth_template[0:-10]
     ccc = normxcorr2(synth_detection, synth_template)
     shift, coeff = lag_calc._xcorr_interp(ccc, 0.01)
     self.assertEqual(shift.round(), 0.0)
     self.assertEqual(coeff.round(), 1.0)
示例#5
0
 def test_interp_normal(self):
     synth_template = np.sin(np.arange(0, 4, 0.01))
     image = np.zeros(1000)
     image[200] = 1
     image = np.convolve(image, synth_template)
     ccc = normxcorr2(synth_template, image)
     shift, coeff = _xcorr_interp(ccc, 0.01)
     self.assertEqual(shift.round(), 2.0)
     self.assertEqual(coeff.round(), 1.0)
示例#6
0
def _compute_dt_correlations(catalog,
                             master,
                             min_link,
                             event_id_mapper,
                             stream_dict,
                             min_cc,
                             extract_len,
                             pre_pick,
                             shift_len,
                             interpolate,
                             max_workers=1):
    """ Compute cross-correlation delay times. """
    max_workers = max_workers or 1
    Logger.info(
        f"Correlating {master.resource_id.id} with {len(catalog)} events")
    differential_times_dict = dict()
    master_stream = _prepare_stream(stream=stream_dict[master.resource_id.id],
                                    event=master,
                                    extract_len=extract_len,
                                    pre_pick=pre_pick)
    available_seed_ids = {tr.id for st in master_stream.values() for tr in st}
    Logger.debug(f"The channels provided are: {available_seed_ids}")
    master_seed_ids = {
        SeedPickID(pick.waveform_id.get_seed_string(), pick.phase_hint[0])
        for pick in master.picks if pick.phase_hint[0] in "PS"
        and pick.waveform_id.get_seed_string() in available_seed_ids
    }
    Logger.debug(f"Using channels: {master_seed_ids}")
    # Dictionary of travel-times for master keyed by {station}_{phase_hint}
    master_tts = dict()
    master_origin_time = (master.preferred_origin() or master.origins[0]).time
    for pick in master.picks:
        if pick.phase_hint[0] not in "PS":
            continue
        tt1 = pick.time - master_origin_time
        master_tts.update({
            "{0}_{1}".format(pick.waveform_id.station_code, pick.phase_hint[0]):
            tt1
        })

    matched_length = extract_len + (2 * shift_len)
    matched_pre_pick = pre_pick + shift_len
    # We will use this to maintain order
    event_dict = {event.resource_id.id: event for event in catalog}
    event_ids = set(event_dict.keys())
    # Check for overlap
    _stream_event_ids = set(stream_dict.keys())
    if len(event_ids.difference(_stream_event_ids)):
        Logger.warning(
            f"Missing streams for {event_ids.difference(_stream_event_ids)}")
        # Just use the event ids that we actually have streams for!
        event_ids = event_ids.intersection(_stream_event_ids)
    matched_streams = {
        event_id: _prepare_stream(stream=stream_dict[event_id],
                                  event=event_dict[event_id],
                                  extract_len=matched_length,
                                  pre_pick=matched_pre_pick,
                                  seed_pick_ids=master_seed_ids)
        for event_id in event_ids
    }

    sampling_rates = {
        tr.stats.sampling_rate
        for st in master_stream.values() for tr in st
    }
    for phase_hint in master_stream.keys():  # Loop over P and S separately
        for sampling_rate in sampling_rates:  # Loop over separate samp rates
            delta = 1.0 / sampling_rate
            _master_stream = master_stream[phase_hint].select(
                sampling_rate=sampling_rate)
            if len(_master_stream) == 0:
                continue
            _matched_streams = dict()
            for key, value in matched_streams.items():
                _st = value[phase_hint].select(sampling_rate=sampling_rate)
                if len(_st) > 0:
                    _matched_streams.update({key: _st})
            if len(_matched_streams) == 0:
                Logger.info("No matching data for {0}, {1} phase".format(
                    master.resource_id.id, phase_hint))
                continue
            # Check lengths
            master_length = [tr.stats.npts for tr in _master_stream]
            if len(set(master_length)) > 1:
                Logger.warning("Multiple lengths found - check that you "
                               "are providing sufficient data")
            master_length = Counter(master_length).most_common(1)[0][0]
            _master_stream = _master_stream.select(npts=master_length)
            matched_length = Counter((tr.stats.npts
                                      for st in _matched_streams.values()
                                      for tr in st))
            if len(matched_length) > 1:
                Logger.warning("Multiple lengths of stream found - taking "
                               "the most common. Check that you are "
                               "providing sufficient data")
            matched_length = matched_length.most_common(1)[0][0]
            if matched_length < master_length:
                Logger.error("Matched streams are shorter than the master, "
                             "will not correlate")
                continue
            # Remove empty streams and generate an ordered list of event_ids
            used_event_ids, used_matched_streams = [], []
            for event_id, _matched_stream in _matched_streams.items():
                _matched_stream = _matched_stream.select(npts=matched_length)
                if len(_matched_stream) > 0:
                    used_event_ids.append(event_id)
                    used_matched_streams.append(_matched_stream)
            # Check that there are matching seed ids.
            master_seed_ids = set(tr.id for tr in _master_stream)
            matched_seed_ids = set(tr.id for st in used_matched_streams
                                   for tr in st)
            if not matched_seed_ids.issubset(master_seed_ids):
                Logger.warning(
                    "After checking length there are no matched traces: "
                    f"master: {master_seed_ids}, matched: {matched_seed_ids}")
                continue
            # Do the correlations
            Logger.debug(
                f"Correlating channels: {[tr.id for tr in _master_stream]}")
            ccc_out, used_chans = _concatenate_and_correlate(
                template=_master_stream,
                streams=used_matched_streams,
                cores=max_workers)
            # Convert ccc_out to pick-time
            for i, used_event_id in enumerate(used_event_ids):
                for j, chan in enumerate(used_chans[i]):
                    if not chan.used:
                        continue
                    correlation = ccc_out[i][j]
                    if interpolate:
                        shift, cc_max = _xcorr_interp(correlation, dt=delta)
                    else:
                        cc_max = np.amax(correlation)
                        shift = np.argmax(correlation) * delta
                    if cc_max < min_cc:
                        continue
                    shift -= shift_len
                    pick = [
                        p for p in event_dict[used_event_id].picks
                        if p.phase_hint[0] == phase_hint
                        and p.waveform_id.station_code == chan.channel[0]
                        and p.waveform_id.channel_code == chan.channel[1]
                    ]
                    pick = sorted(pick, key=lambda p: p.time)[0]
                    tt2 = pick.time - (
                        event_dict[used_event_id].preferred_origin()
                        or event_dict[used_event_id].origins[0]).time
                    tt2 += shift
                    diff_time = differential_times_dict.get(
                        used_event_id, None)
                    if diff_time is None:
                        diff_time = _EventPair(
                            event_id_1=event_id_mapper[master.resource_id.id],
                            event_id_2=event_id_mapper[used_event_id])
                    diff_time.obs.append(
                        _DTObs(station=chan.channel[0],
                               tt1=master_tts["{0}_{1}".format(
                                   chan.channel[0], phase_hint)],
                               tt2=tt2,
                               weight=cc_max**2,
                               phase=phase_hint[0]))
                    differential_times_dict.update({used_event_id: diff_time})
    # Threshold on min_link
    differential_times = [
        dt for dt in differential_times_dict.values()
        if len(dt.obs) >= min_link
    ]
    return differential_times
示例#7
0
def _compute_dt_correlations(catalog, master, min_link, event_id_mapper,
                             stream_dict, min_cc, extract_len, pre_pick,
                             shift_len, interpolate):
    differential_times_dict = dict()
    master_stream = _prepare_stream(
        stream=stream_dict[master.resource_id.id], event=master,
        extract_len=extract_len, pre_pick=pre_pick)
    available_seed_ids = {tr.id for st in master_stream.values() for tr in st}
    master_seed_ids = {
        SeedPickID(pick.waveform_id.get_seed_string(), pick.phase_hint[0])
        for pick in master.picks if
        pick.phase_hint[0] in "PS" and
        pick.waveform_id.get_seed_string() in available_seed_ids}
    # Dictionary of travel-times for master keyed by {station}_{phase_hint}
    master_tts = dict()
    master_origin_time = (master.preferred_origin() or master.origins[0]).time
    for pick in master.picks:
        if pick.phase_hint[0] not in "PS":
            continue
        tt1 = pick.time - master_origin_time
        master_tts.update({
            "{0}_{1}".format(
                pick.waveform_id.station_code, pick.phase_hint[0]): tt1})

    matched_length = extract_len + (2 * shift_len)
    matched_pre_pick = pre_pick + shift_len
    # We will use this to maintain order
    event_dict = {event.resource_id.id: event for event in catalog}
    event_ids = list(event_dict.keys())
    matched_streams = {
        event_id: _prepare_stream(
            stream=stream_dict[event_id], event=event_dict[event_id],
            extract_len=matched_length, pre_pick=matched_pre_pick,
            seed_pick_ids=master_seed_ids)
        for event_id in event_ids}

    sampling_rates = {tr.stats.sampling_rate for st in master_stream.values()
                      for tr in st}
    for phase_hint in master_stream.keys():  # Loop over P and S separately
        for sampling_rate in sampling_rates:  # Loop over separate samp rates
            delta = 1.0 / sampling_rate
            _master_stream = master_stream[phase_hint].select(
                sampling_rate=sampling_rate)
            _matched_streams = dict()
            for key, value in matched_streams.items():
                _st = value[phase_hint].select(sampling_rate=sampling_rate)
                if len(_st) > 0:
                    _matched_streams.update({key: _st})
            if len(_matched_streams) == 0:
                Logger.info("No matching data for {0}, {1} phase".format(
                    master.resource_id.id, phase_hint))
                continue
            # Check lengths
            master_length = Counter(
                (tr.stats.npts for tr in _master_stream)).most_common(1)[0][0]
            _master_stream = _master_stream.select(npts=master_length)
            matched_length = Counter(
                (tr.stats.npts for st in _matched_streams.values()
                 for tr in st)).most_common(1)[0][0]
            # Remove empty streams and generate an ordered list of event_ids
            used_event_ids, used_matched_streams = [], []
            for event_id, _matched_stream in _matched_streams.items():
                _matched_stream = _matched_stream.select(npts=matched_length)
                if len(_matched_stream) > 0:
                    used_event_ids.append(event_id)
                    used_matched_streams.append(_matched_stream)
            ccc_out, used_chans = _concatenate_and_correlate(
                template=_master_stream, streams=used_matched_streams, cores=1)
            # Convert ccc_out to pick-time
            for i, used_event_id in enumerate(used_event_ids):
                for j, chan in enumerate(used_chans[i]):
                    if not chan.used:
                        continue
                    correlation = ccc_out[i][j]
                    if interpolate:
                        shift, cc_max = _xcorr_interp(correlation, dt=delta)
                    else:
                        cc_max = np.amax(correlation)
                        shift = np.argmax(correlation) * delta
                    if cc_max < min_cc:
                        continue
                    shift -= shift_len
                    pick = [p for p in event_dict[used_event_id].picks
                            if p.phase_hint == phase_hint
                            and p.waveform_id.station_code == chan.channel[0]
                            and p.waveform_id.channel_code == chan.channel[1]]
                    pick = sorted(pick, key=lambda p: p.time)[0]
                    tt2 = pick.time - (
                            event_dict[used_event_id].preferred_origin() or
                            event_dict[used_event_id].origins[0]).time
                    tt2 += shift
                    diff_time = differential_times_dict.get(
                        used_event_id, None)
                    if diff_time is None:
                        diff_time = _EventPair(
                            event_id_1=event_id_mapper[master.resource_id.id],
                            event_id_2=event_id_mapper[used_event_id])
                    diff_time.obs.append(
                        _DTObs(station=chan.channel[0],
                               tt1=master_tts["{0}_{1}".format(
                                   chan.channel[0], phase_hint)],
                               tt2=tt2, weight=cc_max ** 2, phase=phase_hint))
                    differential_times_dict.update({used_event_id: diff_time})
    # Threshold on min_link
    differential_times = [dt for dt in differential_times_dict.values()
                          if len(dt.obs) >= min_link]
    return differential_times