def _find_peaks_c(array, threshold): """ Use a C func to find peaks in the array. """ utilslib = _load_cdll('libutils') length = array.shape[0] utilslib.find_peaks.argtypes = [ np.ctypeslib.ndpointer(dtype=np.float32, shape=(length, ), flags=native_str('C_CONTIGUOUS')), ctypes.c_long, ctypes.c_float, np.ctypeslib.ndpointer(dtype=np.uint32, shape=(length, ), flags=native_str('C_CONTIGUOUS')) ] utilslib.find_peaks.restype = ctypes.c_int arr = np.ascontiguousarray(array, np.float32) out = np.ascontiguousarray(np.zeros((length, ), dtype=np.uint32)) ret = utilslib.find_peaks(arr, ctypes.c_long(length), threshold, out) if ret != 0: raise MemoryError("Internal error") peaks_locations = np.nonzero(out) return array[peaks_locations], peaks_locations[0]
def dist_calc(loc1, loc2): """ Function to calculate the distance in km between two points. Uses the `haversine formula <https://en.wikipedia.org/wiki/Haversine_formula>`_ to calculate great circle distance at the Earth's surface, then uses trig to include depth. :type loc1: tuple :param loc1: Tuple of lat, lon, depth (in decimal degrees and km) :type loc2: tuple :param loc2: Tuple of lat, lon, depth (in decimal degrees and km) :returns: Distance between points in km. :rtype: float """ from eqcorrscan.utils.libnames import _load_cdll import ctypes utilslib = _load_cdll('libutils') utilslib.dist_calc.argtypes = [ ctypes.c_float, ctypes.c_float, ctypes.c_float, ctypes.c_float, ctypes.c_float, ctypes.c_float] utilslib.dist_calc.restype = ctypes.c_float dist = utilslib.dist_calc( float(math.radians(loc1[0])), float(math.radians(loc1[1])), float(loc1[2]), float(math.radians(loc2[0])), float(math.radians(loc2[1])), float(loc2[2])) return dist
def dist_mat_km(catalog, num_threads=None): """ Compute the distance matrix for a catalog using hypocentral separation. Will give physical distance in kilometers. :type catalog: obspy.core.event.Catalog :param catalog: Catalog for which to compute the distance matrix :returns: distance matrix :rtype: :class:`numpy.ndarray` """ import ctypes from eqcorrscan.utils.libnames import _load_cdll from future.utils import native_str utilslib = _load_cdll('libutils') utilslib.distance_matrix.argtypes = [ np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), ctypes.c_long, np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), ctypes.c_int ] utilslib.distance_matrix.restype = ctypes.c_int # Initialize square matrix dist_mat = np.zeros((len(catalog), len(catalog)), dtype=np.float32) latitudes, longitudes, depths = (np.empty(len(catalog)), np.empty(len(catalog)), np.empty(len(catalog))) for i, event in enumerate(catalog): origin = event.preferred_origin() or event.origins[0] latitudes[i] = origin.latitude longitudes[i] = origin.longitude depths[i] = origin.depth / 1000 depths = np.ascontiguousarray(depths, dtype=np.float32) latitudes = np.ascontiguousarray(np.radians(latitudes), dtype=np.float32) longitudes = np.ascontiguousarray(np.radians(longitudes), dtype=np.float32) if num_threads is None: # Testing showed that 400 events per thread was best on the i7. num_threads = int(min(cpu_count(), len(catalog) // 400)) if num_threads == 0: num_threads = 1 ret = utilslib.distance_matrix(latitudes, longitudes, depths, len(catalog), dist_mat, num_threads) if ret != 0: # pragma: no cover raise Exception("Internal error while computing distance matrix") # Fill distance matrix out = dist_mat.T + dist_mat return out
def decluster(peaks, index, trig_int, threshold=0): """ Decluster peaks based on an enforced minimum separation. :type peaks: np.array :param peaks: array of peak values :type index: np.ndarray :param index: locations of peaks :type trig_int: int :param trig_int: Minimum trigger interval in samples :type threshold: float :param threshold: Minimum absolute peak value to retain it. :return: list of tuples of (value, sample) """ utilslib = _load_cdll('libutils') length = peaks.shape[0] trig_int = int(trig_int) for var in [index.max(), trig_int]: if var == ctypes.c_long(var).value: long_type = ctypes.c_long func = utilslib.decluster elif var == ctypes.c_longlong(var).value: long_type = ctypes.c_longlong func = utilslib.decluster_ll else: raise OverflowError("Maximum index larger than internal long long") func.argtypes = [ np.ctypeslib.ndpointer(dtype=np.float32, shape=(length, ), flags=native_str('C_CONTIGUOUS')), np.ctypeslib.ndpointer(dtype=long_type, shape=(length, ), flags=native_str('C_CONTIGUOUS')), long_type, ctypes.c_float, long_type, np.ctypeslib.ndpointer(dtype=np.uint32, shape=(length, ), flags=native_str('C_CONTIGUOUS')) ] func.restype = ctypes.c_int sorted_inds = np.abs(peaks).argsort() arr = peaks[sorted_inds[::-1]] inds = index[sorted_inds[::-1]] arr = np.ascontiguousarray(arr, dtype=np.float32) inds = np.ascontiguousarray(inds, dtype=long_type) out = np.zeros(len(arr), dtype=np.uint32) ret = func(arr, inds, long_type(length), np.float32(threshold), long_type(trig_int), out) if ret != 0: raise MemoryError("Issue with c-routine, returned %i" % ret) peaks_out = list(zip(arr[out.astype(bool)], inds[out.astype(bool)])) return peaks_out
def time_multi_normxcorr(templates, stream, pads): """ Compute cross-correlations in the time-domain using C routine. :param templates: 2D Array of templates :type templates: np.ndarray :param stream: 1D array of continuous data :type stream: np.ndarray :param pads: List of ints of pad lengths in the same order as templates :type pads: list :return: np.ndarray of cross-correlations :return: np.ndarray channels used """ from future.utils import native_str used_chans = ~np.isnan(templates).any(axis=1) utilslib = _load_cdll('libutils') utilslib.multi_corr.argtypes = [ np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags=native_str('C_CONTIGUOUS')), ctypes.c_int, ctypes.c_int, np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags=native_str('C_CONTIGUOUS')), ctypes.c_int, np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags=native_str('C_CONTIGUOUS')) ] utilslib.multi_corr.restype = ctypes.c_int template_len = templates.shape[1] n_templates = templates.shape[0] image_len = stream.shape[0] ccc = np.ascontiguousarray( np.empty((image_len - template_len + 1) * n_templates), np.float32) t_array = np.ascontiguousarray(templates.flatten(), np.float32) utilslib.multi_corr(t_array, template_len, n_templates, np.ascontiguousarray(stream, np.float32), image_len, ccc) ccc[np.isnan(ccc)] = 0.0 ccc = ccc.reshape((n_templates, image_len - template_len + 1)) for i in range(len(pads)): ccc[i] = np.append(ccc[i], np.zeros(pads[i]))[pads[i]:] return ccc, used_chans
def decluster(peaks, index, trig_int): """ Decluster peaks based on an enforced minimum separation. :type peaks: np.array :param peaks: array of peak values :type index: np.ndarray :param index: locations of peaks :type trig_int: int :param trig_int: Minimum trigger interval in samples :return: list of tuples of (value, sample) """ from eqcorrscan.utils.libnames import _load_cdll import ctypes from future.utils import native_str from itertools import compress utilslib = _load_cdll('libutils') length = np.int32(len(peaks)) utilslib.find_peaks.argtypes = [ np.ctypeslib.ndpointer(dtype=np.float32, shape=(length,), flags=native_str('C_CONTIGUOUS')), np.ctypeslib.ndpointer(dtype=np.float32, shape=(length,), flags=native_str('C_CONTIGUOUS')), ctypes.c_int, ctypes.c_float, ctypes.c_float, np.ctypeslib.ndpointer(dtype=np.uint32, shape=(length,), flags=native_str('C_CONTIGUOUS'))] utilslib.find_peaks.restype = ctypes.c_int peaks_sort = sorted(zip(peaks, index), key=lambda amplitude: abs(amplitude[0]), reverse=True) arr, inds = zip(*peaks_sort) arr = np.ascontiguousarray(arr, dtype=np.float32) inds = np.array(inds, dtype=np.float32) / trig_int inds = np.ascontiguousarray(inds, dtype=np.float32) out = np.zeros(len(arr), dtype=np.uint32) ret = utilslib.find_peaks( arr, inds, length, 0, np.float32(1), out) if ret != 0: raise MemoryError("Issue with c-routine, returned %i" % ret) peaks_out = list(compress(peaks_sort, out)) return peaks_out
def _multi_find_peaks_c(arrays, thresholds, threads): """ Wrapper for multi-find peaks C-func """ utilslib = _load_cdll('libutils') length = arrays.shape[1] n = np.int32(arrays.shape[0]) thresholds = np.ascontiguousarray(thresholds, np.float32) arr = np.ascontiguousarray(arrays.flatten(), np.float32) utilslib.multi_find_peaks.argtypes = [ np.ctypeslib.ndpointer(dtype=np.float32, shape=(n * length, ), flags=native_str('C_CONTIGUOUS')), ctypes.c_long, ctypes.c_int, np.ctypeslib.ndpointer(dtype=np.float32, shape=(n, ), flags=native_str('C_CONTIGUOUS')), ctypes.c_int, np.ctypeslib.ndpointer(dtype=np.uint32, shape=(n * length, ), flags=native_str('C_CONTIGUOUS')) ] utilslib.multi_find_peaks.restype = ctypes.c_int out = np.ascontiguousarray(np.zeros((n * length, ), dtype=np.uint32)) ret = utilslib.multi_find_peaks(arr, ctypes.c_long(length), n, thresholds, threads, out) # Copy data to avoid farking the users data if ret != 0: raise MemoryError("Internal error") peaks = [] peak_locations = [] out = out.reshape(n, length) for i in range(n): peak_locs = np.nonzero(out[i]) peaks.append(arrays[i][peak_locs]) peak_locations.append(peak_locs[0]) return peaks, peak_locations
def fftw_multi_normxcorr(template_array, stream_array, pad_array, seed_ids, cores_inner, cores_outer): """ Use a C loop rather than a Python loop - in some cases this will be fast. :type template_array: dict :param template_array: :type stream_array: dict :param stream_array: :type pad_array: dict :param pad_array: :type seed_ids: list :param seed_ids: rtype: np.ndarray, list :return: 3D Array of cross-correlations and list of used channels. """ utilslib = _load_cdll('libutils') utilslib.multi_normxcorr_fftw.argtypes = [ np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), ctypes.c_long, ctypes.c_long, ctypes.c_long, np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), ctypes.c_long, np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), ctypes.c_long, np.ctypeslib.ndpointer(dtype=np.intc, flags=native_str('C_CONTIGUOUS')), np.ctypeslib.ndpointer(dtype=np.intc, flags=native_str('C_CONTIGUOUS')), ctypes.c_int, ctypes.c_int, np.ctypeslib.ndpointer(dtype=np.intc, flags=native_str('C_CONTIGUOUS')) ] utilslib.multi_normxcorr_fftw.restype = ctypes.c_int ''' Arguments are: templates (stacked [ch_1-t_1, ch_1-t_2, ..., ch_2-t_1, ch_2-t_2, ...]) number of templates template length number of channels image (stacked [ch_1, ch_2, ..., ch_n]) image length cross-correlations (stacked as per image) fft-length used channels (stacked as per templates) pad array (stacked as per templates) ''' # pre processing used_chans = [] template_len = template_array[seed_ids[0]].shape[1] for seed_id in seed_ids: used_chans.append(~np.isnan(template_array[seed_id]).any(axis=1)) template_array[seed_id] = ( (template_array[seed_id] - template_array[seed_id].mean(axis=-1, keepdims=True)) / (template_array[seed_id].std(axis=-1, keepdims=True) * template_len)) template_array[seed_id] = np.nan_to_num(template_array[seed_id]) n_channels = len(seed_ids) n_templates = template_array[seed_ids[0]].shape[0] image_len = stream_array[seed_ids[0]].shape[0] fft_len = next_fast_len(template_len + image_len - 1) template_array = np.ascontiguousarray( [template_array[x] for x in seed_ids], dtype=np.float32) stream_array = np.ascontiguousarray([stream_array[x] for x in seed_ids], dtype=np.float32) cccs = np.zeros((n_templates, image_len - template_len + 1), np.float32) used_chans_np = np.ascontiguousarray(used_chans, dtype=np.intc) pad_array_np = np.ascontiguousarray( [pad_array[seed_id] for seed_id in seed_ids], dtype=np.intc) variance_warnings = np.ascontiguousarray(np.zeros(n_channels), dtype=np.intc) # call C function ret = utilslib.multi_normxcorr_fftw(template_array, n_templates, template_len, n_channels, stream_array, image_len, cccs, fft_len, used_chans_np, pad_array_np, cores_outer, cores_inner, variance_warnings) if ret < 0: raise MemoryError("Memory allocation failed in correlation C-code") elif ret not in [0, 999]: print('Error in C code (possible normalisation error)') print('Maximum cccs %f at %s' % (cccs.max(), np.unravel_index(cccs.argmax(), cccs.shape))) print('Minimum cccs %f at %s' % (cccs.min(), np.unravel_index(cccs.argmin(), cccs.shape))) raise CorrelationError("Internal correlation error") elif ret == 999: warnings.warn("Some correlations not computed, are there " "zeros in data? If not, consider increasing gain.") for i, variance_warning in enumerate(variance_warnings): if variance_warning and variance_warning > template_len: warnings.warn("Low variance found in {0} places for {1}," " check result.".format(variance_warning, seed_ids[i])) return cccs, used_chans
def fftw_normxcorr(templates, stream, pads, threaded=False, *args, **kwargs): """ Normalised cross-correlation using the fftw library. Internally this function used double precision numbers, which is definitely required for seismic data. Cross-correlations are computed as the inverse fft of the dot product of the ffts of the stream and the reversed, normalised, templates. The cross-correlation is then normalised using the running mean and standard deviation (not using the N-1 correction) of the stream and the sums of the normalised templates. This python function wraps the C-library written by C. Chamberlain for this purpose. :param templates: 2D Array of templates :type templates: np.ndarray :param stream: 1D array of continuous data :type stream: np.ndarray :param pads: List of ints of pad lengths in the same order as templates :type pads: list :param threaded: Whether to use the threaded routine or not - note openMP and python multiprocessing don't seem to play nice for this. :type threaded: bool :return: np.ndarray of cross-correlations :return: np.ndarray channels used """ utilslib = _load_cdll('libutils') argtypes = [ np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags=native_str('C_CONTIGUOUS')), ctypes.c_long, ctypes.c_long, np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags=native_str('C_CONTIGUOUS')), ctypes.c_long, np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), ctypes.c_long, np.ctypeslib.ndpointer(dtype=np.intc, flags=native_str('C_CONTIGUOUS')), np.ctypeslib.ndpointer(dtype=np.intc, flags=native_str('C_CONTIGUOUS')), np.ctypeslib.ndpointer(dtype=np.intc, flags=native_str('C_CONTIGUOUS')) ] restype = ctypes.c_int if threaded: func = utilslib.normxcorr_fftw_threaded else: func = utilslib.normxcorr_fftw func.argtypes = argtypes func.restype = restype # Generate a template mask used_chans = ~np.isnan(templates).any(axis=1) template_length = templates.shape[1] stream_length = len(stream) n_templates = templates.shape[0] fftshape = next_fast_len(template_length + stream_length - 1) # # Normalize and flip the templates norm = ((templates - templates.mean(axis=-1, keepdims=True)) / (templates.std(axis=-1, keepdims=True) * template_length)) norm = np.nan_to_num(norm) ccc = np.zeros((n_templates, stream_length - template_length + 1), np.float32) used_chans_np = np.ascontiguousarray(used_chans, dtype=np.intc) pads_np = np.ascontiguousarray(pads, dtype=np.intc) variance_warning = np.ascontiguousarray([0], dtype=np.intc) ret = func(np.ascontiguousarray(norm.flatten(order='C'), np.float32), template_length, n_templates, np.ascontiguousarray(stream, np.float32), stream_length, np.ascontiguousarray(ccc, np.float32), fftshape, used_chans_np, pads_np, variance_warning) if ret < 0: raise MemoryError() elif ret not in [0, 999]: print('Error in C code (possible normalisation error)') print('Maximum ccc %f at %i' % (ccc.max(), ccc.argmax())) print('Minimum ccc %f at %i' % (ccc.min(), ccc.argmin())) raise CorrelationError("Internal correlation error") elif ret == 999: warnings.warn("Some correlations not computed, are there " "zeros in data? If not, consider increasing gain.") if variance_warning[0] and variance_warning[0] > template_length: warnings.warn( "Low variance found in {0} positions, check result.".format( variance_warning[0])) return ccc, used_chans
def time_multi_normxcorr(templates, stream, pads, threaded=False, *args, **kwargs): """ Compute cross-correlations in the time-domain using C routine. :param templates: 2D Array of templates :type templates: np.ndarray :param stream: 1D array of continuous data :type stream: np.ndarray :param pads: List of ints of pad lengths in the same order as templates :type pads: list :param threaded: Whether to use the threaded routine or not :type threaded: bool :return: np.ndarray of cross-correlations :return: np.ndarray channels used """ used_chans = ~np.isnan(templates).any(axis=1) utilslib = _load_cdll('libutils') argtypes = [ np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags=native_str('C_CONTIGUOUS')), ctypes.c_int, ctypes.c_int, np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags=native_str('C_CONTIGUOUS')), ctypes.c_int, np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags=native_str('C_CONTIGUOUS')) ] restype = ctypes.c_int if threaded: func = utilslib.multi_normxcorr_time_threaded argtypes.append(ctypes.c_int) else: func = utilslib.multi_normxcorr_time func.argtypes = argtypes func.restype = restype # Need to de-mean everything templates_means = templates.mean(axis=1).astype(np.float32)[:, np.newaxis] stream_mean = stream.mean().astype(np.float32) templates = templates.astype(np.float32) - templates_means stream = stream.astype(np.float32) - stream_mean template_len = templates.shape[1] n_templates = templates.shape[0] image_len = stream.shape[0] ccc = np.ascontiguousarray( np.empty((image_len - template_len + 1) * n_templates), np.float32) t_array = np.ascontiguousarray(templates.flatten(), np.float32) time_args = [ t_array, template_len, n_templates, np.ascontiguousarray(stream, np.float32), image_len, ccc ] if threaded: time_args.append(kwargs.get('cores', cpu_count())) func(*time_args) ccc[np.isnan(ccc)] = 0.0 ccc = ccc.reshape((n_templates, image_len - template_len + 1)) for i in range(len(pads)): ccc[i] = np.append(ccc[i], np.zeros(pads[i]))[pads[i]:] templates += templates_means stream += stream_mean return ccc, used_chans
def fftw_multi_normxcorr(template_array, stream_array, pad_array, seed_ids, cores_inner, stack=True, *args, **kwargs): """ Use a C loop rather than a Python loop - in some cases this will be fast. :type template_array: dict :param template_array: :type stream_array: dict :param stream_array: :type pad_array: dict :param pad_array: :type seed_ids: list :param seed_ids: rtype: np.ndarray, list :return: 3D Array of cross-correlations and list of used channels. """ utilslib = _load_cdll('libutils') utilslib.multi_normxcorr_fftw.argtypes = [ np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), ctypes.c_long, ctypes.c_long, ctypes.c_long, np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), ctypes.c_long, np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), ctypes.c_long, np.ctypeslib.ndpointer(dtype=np.intc, flags=native_str('C_CONTIGUOUS')), np.ctypeslib.ndpointer(dtype=np.intc, flags=native_str('C_CONTIGUOUS')), ctypes.c_int, np.ctypeslib.ndpointer(dtype=np.intc, flags=native_str('C_CONTIGUOUS')), np.ctypeslib.ndpointer(dtype=np.intc, flags=native_str('C_CONTIGUOUS')), ctypes.c_int ] utilslib.multi_normxcorr_fftw.restype = ctypes.c_int ''' Arguments are: templates (stacked [ch_1-t_1, ch_1-t_2, ..., ch_2-t_1, ch_2-t_2, ...]) number of templates template length number of channels image (stacked [ch_1, ch_2, ..., ch_n]) image length cross-correlations (stacked as per image) fft-length used channels (stacked as per templates) pad array (stacked as per templates) num thread inner variance warnings missed correlation warnings (usually due to gaps) stack option ''' # pre processing used_chans = [] template_len = template_array[seed_ids[0]].shape[1] for seed_id in seed_ids: used_chans.append(~np.isnan(template_array[seed_id]).any(axis=1)) template_array[seed_id] = ( (template_array[seed_id] - template_array[seed_id].mean(axis=-1, keepdims=True)) / (template_array[seed_id].std(axis=-1, keepdims=True) * template_len)) template_array[seed_id] = np.nan_to_num(template_array[seed_id]) n_channels = len(seed_ids) n_templates = template_array[seed_ids[0]].shape[0] image_len = stream_array[seed_ids[0]].shape[0] fft_len = kwargs.get("fft_len") if fft_len is None: # In testing, 2**13 consistently comes out fastest - setting to # default. https://github.com/eqcorrscan/EQcorrscan/pull/285 fft_len = min(2**13, next_fast_len(template_len + image_len - 1)) if fft_len < template_len: Logger.warning( "FFT length of {0} is shorter than the template, setting to " "{1}".format(fft_len, next_fast_len(template_len + image_len - 1))) fft_len = next_fast_len(template_len + image_len - 1) template_array = np.ascontiguousarray( [template_array[x] for x in seed_ids], dtype=np.float32) multipliers = {} for x in seed_ids: # Check that stream is non-zero and above variance threshold if not np.all(stream_array[x] == 0) and np.var(stream_array[x]) < 1e-8: # Apply gain stream_array[x] *= MULTIPLIER Logger.warning("Low variance found for {0}, applying gain " "to stabilise correlations".format(x)) multipliers.update({x: MULTIPLIER}) else: multipliers.update({x: 1}) stream_array = np.ascontiguousarray([stream_array[x] for x in seed_ids], dtype=np.float32) ccc_length = image_len - template_len + 1 assert ccc_length > 0, "Template must be shorter than stream" if stack: cccs = np.zeros((n_templates, ccc_length), np.float32) else: cccs = np.zeros((n_templates, n_channels, ccc_length), dtype=np.float32) used_chans_np = np.ascontiguousarray(used_chans, dtype=np.intc) pad_array_np = np.ascontiguousarray( [pad_array[seed_id] for seed_id in seed_ids], dtype=np.intc) variance_warnings = np.ascontiguousarray(np.zeros(n_channels), dtype=np.intc) missed_correlations = np.ascontiguousarray(np.zeros(n_channels), dtype=np.intc) # call C function ret = utilslib.multi_normxcorr_fftw(template_array, n_templates, template_len, n_channels, stream_array, image_len, cccs, fft_len, used_chans_np, pad_array_np, cores_inner, variance_warnings, missed_correlations, int(stack)) if ret < 0: raise MemoryError("Memory allocation failed in correlation C-code") elif ret > 0: Logger.critical('Error in C code (possible normalisation error)') Logger.critical( 'Maximum cccs %f at %s' % (cccs.max(), np.unravel_index(cccs.argmax(), cccs.shape))) Logger.critical( 'Minimum cccs %f at %s' % (cccs.min(), np.unravel_index(cccs.argmin(), cccs.shape))) Logger.critical('Recommend checking your data for spikes, clipping ' 'or artefacts') raise CorrelationError("Internal correlation error") for i, missed_corr in enumerate(missed_correlations): if missed_corr: Logger.debug( "{0} correlations not computed on {1}, are there gaps in the " "data? If not, consider increasing gain".format( missed_corr, seed_ids[i])) for i, variance_warning in enumerate(variance_warnings): if variance_warning and variance_warning > template_len: Logger.warning("Low variance found in {0} places for {1}, check " "result.".format(variance_warning, seed_ids[i])) # Remove gain for i, x in enumerate(seed_ids): stream_array[i] *= multipliers[x] return cccs, used_chans
def fftw_normxcorr(templates, stream, pads, threaded=False, *args, **kwargs): """ Normalised cross-correlation using the fftw library. Internally this function used double precision numbers, which is definitely required for seismic data. Cross-correlations are computed as the inverse fft of the dot product of the ffts of the stream and the reversed, normalised, templates. The cross-correlation is then normalised using the running mean and standard deviation (not using the N-1 correction) of the stream and the sums of the normalised templates. This python function wraps the C-library written by C. Chamberlain for this purpose. :param templates: 2D Array of templates :type templates: np.ndarray :param stream: 1D array of continuous data :type stream: np.ndarray :param pads: List of ints of pad lengths in the same order as templates :type pads: list :param threaded: Whether to use the threaded routine or not - note openMP and python multiprocessing don't seem to play nice for this. :type threaded: bool :return: np.ndarray of cross-correlations :return: np.ndarray channels used """ utilslib = _load_cdll('libutils') argtypes = [ np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags=native_str('C_CONTIGUOUS')), ctypes.c_long, ctypes.c_long, np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags=native_str('C_CONTIGUOUS')), ctypes.c_long, np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), ctypes.c_long, np.ctypeslib.ndpointer(dtype=np.intc, flags=native_str('C_CONTIGUOUS')), np.ctypeslib.ndpointer(dtype=np.intc, flags=native_str('C_CONTIGUOUS')), np.ctypeslib.ndpointer(dtype=np.intc, flags=native_str('C_CONTIGUOUS')), np.ctypeslib.ndpointer(dtype=np.intc, flags=native_str('C_CONTIGUOUS')) ] restype = ctypes.c_int if threaded: func = utilslib.normxcorr_fftw_threaded else: func = utilslib.normxcorr_fftw func.argtypes = argtypes func.restype = restype # Generate a template mask used_chans = ~np.isnan(templates).any(axis=1) template_length = templates.shape[1] stream_length = len(stream) n_templates = templates.shape[0] fftshape = kwargs.get("fft_len") if fftshape is None: # In testing, 2**13 consistently comes out fastest - setting to # default. https://github.com/eqcorrscan/EQcorrscan/pull/285 fftshape = min(2**13, next_fast_len(template_length + stream_length - 1)) if fftshape < template_length: Logger.warning( "FFT length of {0} is shorter than the template, setting to " "{1}".format(fftshape, next_fast_len(template_length + stream_length - 1))) fftshape = next_fast_len(template_length + stream_length - 1) # Normalize and flip the templates norm = ((templates - templates.mean(axis=-1, keepdims=True)) / (templates.std(axis=-1, keepdims=True) * template_length)) norm = np.nan_to_num(norm) ccc_length = stream_length - template_length + 1 assert ccc_length > 0, "Template must be shorter than stream" ccc = np.zeros((n_templates, ccc_length), np.float32) used_chans_np = np.ascontiguousarray(used_chans, dtype=np.intc) pads_np = np.ascontiguousarray(pads, dtype=np.intc) variance_warning = np.ascontiguousarray([0], dtype=np.intc) missed_corr = np.ascontiguousarray([0], dtype=np.intc) # Check that stream is non-zero and above variance threshold if not np.all(stream == 0) and np.var(stream) < 1e-8: # Apply gain stream *= MULTIPLIER Logger.warning("Low variance found for, applying gain " "to stabilise correlations") multiplier = MULTIPLIER else: multiplier = 1 ret = func(np.ascontiguousarray(norm.flatten(order='C'), np.float32), template_length, n_templates, np.ascontiguousarray(stream, np.float32), stream_length, np.ascontiguousarray(ccc, np.float32), fftshape, used_chans_np, pads_np, variance_warning, missed_corr) if ret < 0: raise MemoryError() elif ret > 0: Logger.critical('Error in C code (possible normalisation error)') Logger.critical('Maximum ccc %f at %i' % (ccc.max(), ccc.argmax())) Logger.critical('Minimum ccc %f at %i' % (ccc.min(), ccc.argmin())) Logger.critical('Recommend checking your data for spikes, clipping ' 'or artefacts') raise CorrelationError("Internal correlation error") if missed_corr[0]: Logger.warning("{0} correlations not computed, are there gaps in the " "data? If not, consider increasing gain".format( missed_corr[0])) if variance_warning[0] and variance_warning[0] > template_length: Logger.warning( "Low variance found in {0} positions, check result.".format( variance_warning[0])) # Remove variance correction stream /= multiplier return ccc, used_chans
def _multi_decluster(peaks, indices, trig_int, thresholds, cores): """ Decluster peaks based on an enforced minimum separation. Only works when peaks and indices are all the same shape. :type peaks: list :param peaks: list of arrays of peak values :type indices: list :param indices: list of arrays of locations of peaks :type trig_int: int :param trig_int: Minimum trigger interval in samples :type thresholds: list :param thresholds: list of float of threshold values :return: list of lists of tuples of (value, sample) """ utilslib = _load_cdll('libutils') lengths = np.array([peak.shape[0] for peak in peaks], dtype=int) trig_int = int(trig_int) n = np.int32(len(peaks)) cores = min(cores, n) total_length = lengths.sum() max_indexes = [_indices.max() for _indices in indices] max_index = max(max_indexes) for var in [trig_int, lengths.max(), max_index]: if var == ctypes.c_long(var).value: long_type = ctypes.c_long func = utilslib.multi_decluster elif var == ctypes.c_longlong(var).value: long_type = ctypes.c_longlong func = utilslib.multi_decluster_ll else: # Note, could use numpy.gcd to try and find greatest common # divisor and make numbers smaller raise OverflowError("Maximum index larger than internal long long") func.argtypes = [ np.ctypeslib.ndpointer(dtype=np.float32, shape=(total_length,), flags=native_str('C_CONTIGUOUS')), np.ctypeslib.ndpointer(dtype=long_type, shape=(total_length,), flags=native_str('C_CONTIGUOUS')), np.ctypeslib.ndpointer(dtype=long_type, shape=(n,), flags=native_str('C_CONTIGUOUS')), ctypes.c_int, np.ctypeslib.ndpointer(dtype=np.float32, shape=(n,), flags=native_str('C_CONTIGUOUS')), long_type, np.ctypeslib.ndpointer(dtype=np.uint32, shape=(total_length,), flags=native_str('C_CONTIGUOUS')), ctypes.c_int] func.restype = ctypes.c_int peaks_sorted = np.empty(total_length, dtype=np.float32) indices_sorted = np.empty_like(peaks_sorted, dtype=np.float32) # TODO: When doing full decluster from match-filter, all lengths will be # TODO: the same - would be more efficient to use numpy sort on 2D matrix start_ind = 0 end_ind = 0 for _peaks, _indices, length in zip(peaks, indices, lengths): end_ind += length sorted_indices = np.abs(_peaks).argsort() peaks_sorted[start_ind: end_ind] = _peaks[sorted_indices[::-1]] indices_sorted[start_ind: end_ind] = _indices[sorted_indices[::-1]] start_ind += length peaks_sorted = np.ascontiguousarray(peaks_sorted, dtype=np.float32) indices_sorted = np.ascontiguousarray( indices_sorted, dtype=long_type) lengths = np.ascontiguousarray(lengths, dtype=long_type) thresholds = np.ascontiguousarray(thresholds, dtype=np.float32) out = np.zeros(total_length, dtype=np.uint32) ret = func( peaks_sorted, indices_sorted, lengths, np.int32(n), thresholds, long_type(trig_int + 1), out, np.int32(cores)) if ret != 0: raise MemoryError("Issue with c-routine, returned %i" % ret) peaks_out = [] slice_start = 0 for length in lengths: slice_end = slice_start + length out_mask = out[slice_start: slice_end].astype(bool) declustered_peaks = peaks_sorted[slice_start: slice_end][out_mask] declustered_indices = indices_sorted[slice_start: slice_end][out_mask] peaks_out.append(list(zip(declustered_peaks, declustered_indices))) slice_start = slice_end return peaks_out
def fftw_multi_normxcorr(template_array, stream_array, pad_array, seed_ids): """ Use a C loop rather than a Python loop - in some cases this will be fast. :type template_array: dict :param template_array: :type stream_array: dict :param stream_array: :type pad_array: dict :param pad_array: :type seed_ids: list :param seed_ids: rtype: np.ndarray, list :return: 3D Array of cross-correlations and list of used channels. """ utilslib = _load_cdll('libutils') utilslib.multi_normxcorr_fftw.argtypes = [ np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), ctypes.c_int, ctypes.c_int, ctypes.c_int, np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), ctypes.c_int, np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), ctypes.c_int, np.ctypeslib.ndpointer(dtype=np.intc, flags=native_str('C_CONTIGUOUS')), np.ctypeslib.ndpointer(dtype=np.intc, flags=native_str('C_CONTIGUOUS'))] utilslib.multi_normxcorr_fftw.restype = ctypes.c_int ''' Arguments are: templates (stacked [ch_1-t_1, ch_1-t_2, ..., ch_2-t_1, ch_2-t_2, ...]) number of templates template length number of channels image (stacked [ch_1, ch_2, ..., ch_n]) image length cross-correlations (stacked as per image) fft-length used channels (stacked as per templates) pad array (stacked as per templates) ''' # pre processing used_chans = [] template_len = template_array[seed_ids[0]].shape[1] for seed_id in seed_ids: used_chans.append(~np.isnan(template_array[seed_id]).any(axis=1)) template_array[seed_id] = ( (template_array[seed_id] - template_array[seed_id].mean(axis=-1, keepdims=True)) / ( template_array[seed_id].std(axis=-1, keepdims=True) * template_len)) template_array[seed_id] = np.nan_to_num(template_array[seed_id]) n_channels = len(seed_ids) n_templates = template_array[seed_ids[0]].shape[0] image_len = stream_array[seed_ids[0]].shape[0] fft_len = next_fast_len(template_len + image_len - 1) template_array = np.ascontiguousarray([template_array[x] for x in seed_ids], dtype=np.float32) stream_array = np.ascontiguousarray([stream_array[x] for x in seed_ids], dtype=np.float32) cccs = np.zeros((n_templates, image_len - template_len + 1), np.float32) used_chans_np = np.ascontiguousarray(used_chans, dtype=np.intc) pad_array_np = np.ascontiguousarray([pad_array[seed_id] for seed_id in seed_ids], dtype=np.intc) # call C function ret = utilslib.multi_normxcorr_fftw( template_array, n_templates, template_len, n_channels, stream_array, image_len, cccs, fft_len, used_chans_np, pad_array_np) if ret < 0: raise MemoryError() elif ret > 0: print('Error in C code (possible normalisation error)') print(cccs.max()) print(cccs.min()) raise MemoryError() return cccs, used_chans
def fftw_normxcorr(templates, stream, pads, threaded=False, *args, **kwargs): """ Normalised cross-correlation using the fftw library. Internally this function used double precision numbers, which is definitely required for seismic data. Cross-correlations are computed as the inverse fft of the dot product of the ffts of the stream and the reversed, normalised, templates. The cross-correlation is then normalised using the running mean and standard deviation (not using the N-1 correction) of the stream and the sums of the normalised templates. This python fucntion wraps the C-library written by C. Chamberlain for this purpose. :param templates: 2D Array of templates :type templates: np.ndarray :param stream: 1D array of continuous data :type stream: np.ndarray :param pads: List of ints of pad lengths in the same order as templates :type pads: list :param threaded: Whether to use the threaded routine or not - note openMP and python multiprocessing don't seem to play nice for this. :type threaded: bool :return: np.ndarray of cross-correlations :return: np.ndarray channels used """ utilslib = _load_cdll('libutils') argtypes = [ np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags=native_str('C_CONTIGUOUS')), ctypes.c_int, ctypes.c_int, np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags=native_str('C_CONTIGUOUS')), ctypes.c_int, np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), ctypes.c_int, np.ctypeslib.ndpointer(dtype=np.intc, flags=native_str('C_CONTIGUOUS')), np.ctypeslib.ndpointer(dtype=np.intc, flags=native_str('C_CONTIGUOUS'))] restype = ctypes.c_int if threaded: func = utilslib.normxcorr_fftw_threaded else: func = utilslib.normxcorr_fftw func.argtypes = argtypes func.restype = restype # Generate a template mask used_chans = ~np.isnan(templates).any(axis=1) template_length = templates.shape[1] stream_length = len(stream) n_templates = templates.shape[0] fftshape = next_fast_len(template_length + stream_length - 1) # # Normalize and flip the templates norm = ((templates - templates.mean(axis=-1, keepdims=True)) / ( templates.std(axis=-1, keepdims=True) * template_length)) norm = np.nan_to_num(norm) ccc = np.zeros((n_templates, stream_length - template_length + 1), np.float32) used_chans_np = np.ascontiguousarray(used_chans, dtype=np.intc) pads_np = np.ascontiguousarray(pads, dtype=np.intc) ret = func( np.ascontiguousarray(norm.flatten(order='C'), np.float32), template_length, n_templates, np.ascontiguousarray(stream, np.float32), stream_length, np.ascontiguousarray(ccc, np.float32), fftshape, used_chans_np, pads_np) if ret != 0: print(ret) raise MemoryError() return ccc, used_chans
def remove_unclustered(catalog, distance_cutoff, num_threads=None): """ Remove events in catalog which do not have any other nearby events. :type catalog: obspy.core.event.Catalog :param catalog: Catalog for which to compute the distance matrix :type distance_cutoff: float :param distance_cutoff: Cutoff for considering events unclustered in km :returns: catalog :rtype: :class:`obspy.core.event.Catalog` """ import ctypes from eqcorrscan.utils.libnames import _load_cdll from future.utils import native_str from math import radians utilslib = _load_cdll('libutils') utilslib.remove_unclustered.argtypes = [ np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), np.ctypeslib.ndpointer(dtype=np.float32, flags=native_str('C_CONTIGUOUS')), ctypes.c_long, np.ctypeslib.ndpointer(dtype=np.uint8, flags=native_str('C_CONTIGUOUS')), ctypes.c_float, ctypes.c_int ] utilslib.remove_unclustered.restype = ctypes.c_int # Initialize square matrix mask = np.ascontiguousarray(np.zeros(len(catalog), dtype=np.uint8)) latitudes, longitudes, depths = (np.empty(len(catalog), dtype=np.float32), np.empty(len(catalog), dtype=np.float32), np.empty(len(catalog), dtype=np.float32)) for i, event in enumerate(catalog): origin = event.preferred_origin() or event.origins[0] latitudes[i] = radians(origin.latitude) longitudes[i] = radians(origin.longitude) depths[i] = origin.depth / 1000 depths = np.ascontiguousarray(depths, dtype=np.float32) latitudes = np.ascontiguousarray(latitudes, dtype=np.float32) longitudes = np.ascontiguousarray(longitudes, dtype=np.float32) if num_threads is None: # Testing showed that 400 events per thread was best on the i7. num_threads = int(min(cpu_count(), len(catalog) // 400)) if num_threads == 0: num_threads = 1 ret = utilslib.remove_unclustered(latitudes, longitudes, depths, len(catalog), mask, distance_cutoff, num_threads) if ret != 0: # pragma: no cover raise Exception("Internal error while computing distance matrix") _events = [] for i, event in enumerate(catalog.events): if mask[i]: _events.append(event) catalog.events = _events return catalog
def decluster_distance_time(peaks, index, trig_int, catalog, hypocentral_separation, threshold=0): """ Decluster based on time between peaks, and distance between events. Peaks, index and catalog must all be sorted the same way, e.g. peak[i] corresponds to index[i] and catalog[i]. Peaks that are within the time threshold of one-another, but correspond to events separated by more than the hypocentral_separation threshold will not be removed. :type peaks: np.array :param peaks: array of peak values :type index: np.ndarray :param index: locations of peaks :type trig_int: int :param trig_int: Minimum trigger interval in samples :type catalog: obspy.core.event.Catalog :param catalog: Catalog of events with origins to use to measure inter-event distances :type hypocentral_separation: float :param hypocentral_separation: Maximum inter-event distance to decluster over in km :type threshold: float :param threshold: Minimum absolute peak value to retain it :return: list of tuples of (value, sample) """ utilslib = _load_cdll('libutils') length = peaks.shape[0] trig_int = int(trig_int) for var in [index.max(), trig_int]: if var == ctypes.c_long(var).value: long_type = ctypes.c_long func = utilslib.decluster_dist_time elif var == ctypes.c_longlong(var).value: long_type = ctypes.c_longlong func = utilslib.decluster_dist_time_ll else: raise OverflowError("Maximum index larger than internal long long") func.argtypes = [ np.ctypeslib.ndpointer(dtype=np.float32, shape=(length, ), flags=native_str('C_CONTIGUOUS')), np.ctypeslib.ndpointer(dtype=long_type, shape=(length, ), flags=native_str('C_CONTIGUOUS')), np.ctypeslib.ndpointer(dtype=np.float32, shape=(length * length, ), flags=native_str('C_CONTIGUOUS')), long_type, ctypes.c_float, long_type, ctypes.c_float, np.ctypeslib.ndpointer(dtype=np.uint32, shape=(length, ), flags=native_str('C_CONTIGUOUS')) ] func.restype = ctypes.c_int sorted_inds = np.abs(peaks).argsort() # Sort everything in the same way. arr = peaks[sorted_inds[::-1]] inds = index[sorted_inds[::-1]] sorted_events = [catalog[i] for i in sorted_inds[::-1]] distance_matrix = dist_mat_km(catalog=sorted_events) arr = np.ascontiguousarray(arr, dtype=np.float32) inds = np.ascontiguousarray(inds, dtype=long_type) distance_matrix = np.ascontiguousarray(distance_matrix.flatten(order="C"), dtype=np.float32) out = np.zeros(len(arr), dtype=np.uint32) ret = func(arr, inds, distance_matrix, long_type(length), np.float32(threshold), long_type(trig_int), hypocentral_separation, out) if ret != 0: raise MemoryError("Issue with c-routine, returned %i" % ret) peaks_out = list(zip(arr[out.astype(bool)], inds[out.astype(bool)])) return peaks_out
def fftw_xcorr(templates, stream, pads): """ Normalised cross-correlation using the fftw library. Internally this function used double precision numbers, which is definitely required for seismic data. Cross-correlations are computed as the inverse fft of the dot product of the ffts of the stream and the reversed, normalised, templates. The cross-correlation is then normalised using the running mean and standard deviation (not using the N-1 correction) of the stream and the sums of the normalised templates. This python fucntion wraps the C-library written by C. Chamberlain for this purpose. :param templates: 2D Array of templates :type templates: np.ndarray :param stream: 1D array of continuous data :type stream: np.ndarray :param pads: List of ints of pad lengths in the same order as templates :type pads: list :return: np.ndarray of cross-correlations :return: np.ndarray channels used """ from future.utils import native_str utilslib = _load_cdll('libutils') utilslib.normxcorr_fftw_1d.argtypes = [ np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags=native_str('C_CONTIGUOUS')), ctypes.c_int, np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags=native_str('C_CONTIGUOUS')), ctypes.c_int, np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags=native_str('C_CONTIGUOUS')), ctypes.c_int ] utilslib.normxcorr_fftw_1d.restype = ctypes.c_int # Generate a template mask used_chans = ~np.isnan(templates).any(axis=1) template_length = templates.shape[1] stream_length = len(stream) n_templates = templates.shape[0] fftshape = next_fast_len(template_length + stream_length - 1) # # Normalize and flip the templates norm = ((templates - templates.mean(axis=-1, keepdims=True)) / (templates.std(axis=-1, keepdims=True) * template_length)) ccc = np.empty((n_templates, stream_length - template_length + 1), np.float32) for i in range(n_templates): if np.all(np.isnan(norm[i])): ccc[i] = np.zeros(stream_length - template_length + 1) else: ret = utilslib.normxcorr_fftw_1d( np.ascontiguousarray(norm[i], np.float32), template_length, np.ascontiguousarray(stream, np.float32), stream_length, np.ascontiguousarray(ccc[i], np.float32), fftshape) if ret != 0: raise MemoryError() ccc = ccc.reshape((n_templates, stream_length - template_length + 1)) ccc[np.isnan(ccc)] = 0.0 if np.any(np.abs(ccc) > 1.01): print('Normalisation error in C code') print(ccc.max()) print(ccc.min()) raise MemoryError() ccc[ccc > 1.0] = 1.0 ccc[ccc < -1.0] = -1.0 for i in range(len(pads)): ccc[i] = np.append(ccc[i], np.zeros(pads[i]))[pads[i]:] return ccc, used_chans