def bandpass(self, data): # raise for some bad scenarios if self.high - 1.0 > -1e-6: msg = ("Selected high corner frequency ({}) of bandpass is at or " "above Nyquist ({}). Applying a high-pass instead.").format( self.freqmax, self.fe) logger.warning(msg) #warnings.warn(msg) return highpass(data, freq=self.freqmin, df=self.sampling_rate, corners=self.corners, zerophase=self.zerophase) if self.low > 1: msg = "Selected low corner frequency is above Nyquist." raise ValueError(msg) if self.zi is None: z, p, k = iirfilter(self.corners, [self.low, self.high], btype='bandpass', ftype='butter', output='zpk') self.sos = zpk2sos(z, p, k) self.zi = sosfilt_zi(self.sos) data, self.zi = sosfilt(self.sos, data, zi=self.zi) return data
def test_highpass_vs_pitsa(self): """ Test Butterworth highpass filter against Butterworth highpass filter of PITSA. Note that the corners value is twice the value of the filter sections in PITSA. The rms of the difference between ObsPy and PITSA tends to get bigger with higher order filtering. """ # load test file filename = os.path.join(self.path, 'rjob_20051006.gz') with gzip.open(filename) as f: data = np.loadtxt(f) # parameters for the test samp_rate = 200.0 freq = 10 corners = 4 # filter trace datcorr = highpass(data, freq, df=samp_rate, corners=corners) # load pitsa file filename = os.path.join(self.path, 'rjob_20051006_highpass.gz') with gzip.open(filename) as f: data_pitsa = np.loadtxt(f) # calculate normalized rms rms = np.sqrt( np.sum((datcorr - data_pitsa)**2) / np.sum(data_pitsa**2)) self.assertEqual(rms < 1.0e-05, True)
def extract_feature_vector(X): try: # preprocess data X = savgol_filter(X, 3, 2) X = highpass(X, 3, 50) X = min_max_scaler.transform(X) # extract time domain features X_mean = np.mean(X, axis=0) X_var = np.var(X, axis=0) X_max = np.max(X, axis=0) X_min = np.min(X, axis=0) X_off = np.subtract(X_max, X_min) X_mad = robust.mad(X, axis=0) # extract frequency domain features X_fft_abs = np.abs( fft(X)) #np.abs() if you want the absolute val of complex number X_fft_mean = np.mean(X_fft_abs, axis=0) X_fft_var = np.var(X_fft_abs, axis=0) X_fft_max = np.max(X_fft_abs, axis=0) X_fft_min = np.min(X_fft_abs, axis=0) # X_psd = [] # X_peakF = [] # obtain feature vector by appending all vectors above as one d-dimension feature vector X = np.append(X_mean, [ X_var, X_max, X_min, X_off, X_mad, X_fft_mean, X_fft_var, X_fft_max, X_fft_min ]) return standard_scaler.transform([X]) except: traceback.print_exc() print("Error in extracting features!")
def test_highpass_zphsh_vs_pitsa(self): """ Test Butterworth zero-phase highpass filter against Butterworth zero-phase highpass filter of PITSA. Note that the corners value is twice the value of the filter sections in PITSA. The rms of the difference between ObsPy and PITSA tends to get bigger with higher order filtering. Note: The Zero-Phase filters deviate from PITSA's zero-phase filters at the end of the trace! The rms for the test is calculated omitting the last 200 samples, as this part of the trace is assumed to generally be of low interest/importance. """ # load test file filename = os.path.join(self.path, 'rjob_20051006.gz') with gzip.open(filename) as f: data = np.loadtxt(f) # parameters for the test samp_rate = 200.0 freq = 10 corners = 2 # filter trace datcorr = highpass(data, freq, df=samp_rate, corners=corners, zerophase=True) # load pitsa file filename = os.path.join(self.path, 'rjob_20051006_highpassZPHSH.gz') with gzip.open(filename) as f: data_pitsa = np.loadtxt(f) # calculate normalized rms rms = np.sqrt( np.sum((datcorr[:-200] - data_pitsa[:-200])**2) / np.sum(data_pitsa[:-200]**2)) self.assertEqual(rms < 1.0e-05, True)
def fillRMS(strSta, dUTC): rmsData['rsam_estacion'] = strSta[0].stats['station'] rmsData['rsam_canal'] = strSta[0].stats['channel'] rmsData['rsam_fecha_proceso'] = str(dUTC.datetime) rmsData['rms'] = getRMS(strSta[0].data) dataFilt1 = filter.bandpass(strSta[0].data, 0.05, 0.125, strSta[0].stats['sampling_rate']) rmsData['rsam_banda1'] = getRMS(dataFilt1) dataFilt2 = filter.bandpass(strSta[0].data, 2, 8, strSta[0].stats['sampling_rate']) rmsData['rsam_banda2'] = getRMS(dataFilt2) dataFilt3 = filter.bandpass(strSta[0].data, 0.25, 2, strSta[0].stats['sampling_rate']) rmsData['rsam_banda3'] = getRMS(dataFilt3) dataFilt4 = filter.highpass(strSta[0].data, 10.0, strSta[0].stats['sampling_rate'], corners=1, zerophase=True) rmsData['rsam_banda4'] = getRMS(dataFilt4)
def predictDanceMove(self, X): # normalize values X = self.scaler.transform(X) # preprocess data X = savgol_filter(X, 3, 2) X = highpass(X, 3, 50) X = self.scaler.transform(X) # extract time domain features X_mean = np.mean(X, axis=0) X_median = np.median(X, axis=0) X_var = np.var(X, axis=0) X_max = np.max(X, axis=0) X_min = np.min(X, axis=0) X_off = np.subtract(X_max, X_min) X_mad = robust.mad(X, axis=0) # extract frequency domain features X_fft_abs = np.abs( fft(X)) # np.abs() if you want the absolute val of complex number X_fft_mean = np.mean(X_fft_abs, axis=0) X_fft_var = np.var(X_fft_abs, axis=0) X_fft_max = np.max(X_fft_abs, axis=0) X_fft_min = np.min(X_fft_abs, axis=0) X_entr = entropy(np.abs(np.fft.rfft(X, axis=0))[1:], base=2) # Append all vectors above as one d-dimension feature vector feature_vector = np.append(X_off, [ X_mean, X_median, X_var, X_mad, X_entr, X_min, X_max, X_fft_mean, X_fft_var, X_fft_max, X_fft_min ]) # Predict using pretrained model return self.model.predict(feature_vector)[0]
def test_highpassVsPitsa(self): """ Test Butterworth highpass filter against Butterworth highpass filter of PITSA. Note that the corners value is twice the value of the filter sections in PITSA. The rms of the difference between ObsPy and PITSA tends to get bigger with higher order filtering. """ # load test file filename = os.path.join(self.path, 'rjob_20051006.gz') with gzip.open(filename) as f: data = np.loadtxt(f) # parameters for the test samp_rate = 200.0 freq = 10 corners = 4 # filter trace datcorr = highpass(data, freq, df=samp_rate, corners=corners) # load pitsa file filename = os.path.join(self.path, 'rjob_20051006_highpass.gz') with gzip.open(filename) as f: data_pitsa = np.loadtxt(f) # calculate normalized rms rms = np.sqrt(np.sum((datcorr - data_pitsa) ** 2) / np.sum(data_pitsa ** 2)) self.assertEqual(rms < 1.0e-05, True)
def test_highpassZPHSHVsPitsa(self): """ Test Butterworth zero-phase highpass filter against Butterworth zero-phase highpass filter of PITSA. Note that the corners value is twice the value of the filter sections in PITSA. The rms of the difference between ObsPy and PITSA tends to get bigger with higher order filtering. Note: The Zero-Phase filters deviate from PITSA's zero-phase filters at the end of the trace! The rms for the test is calculated omitting the last 200 samples, as this part of the trace is assumed to generally be of low interest/importance. """ # load test file filename = os.path.join(self.path, 'rjob_20051006.gz') with gzip.open(filename) as f: data = np.loadtxt(f) # parameters for the test samp_rate = 200.0 freq = 10 corners = 2 # filter trace datcorr = highpass(data, freq, df=samp_rate, corners=corners, zerophase=True) # load pitsa file filename = os.path.join(self.path, 'rjob_20051006_highpassZPHSH.gz') with gzip.open(filename) as f: data_pitsa = np.loadtxt(f) # calculate normalized rms rms = np.sqrt(np.sum((datcorr[:-200] - data_pitsa[:-200]) ** 2) / np.sum(data_pitsa[:-200] ** 2)) self.assertEqual(rms < 1.0e-05, True)
def filtering(st,ty,args): hip = args.highpass lop = args.lowpass bdp = args.bandpass if hip != "0": elements = hip.split() cors = int(elements[0]) freq = eval(elements[1]) for i in range(len(st)): st[i].data = highpass(st[i].data, freq, df=st[i].stats.sampling_rate, corners=cors, zerophase=args.zeroph) if lop != "0": elements = lop.split() cors = int(elements[0]) freq = eval(elements[1]) for i in range(len(st)): st[i].data = lowpass(st[i].data, freq, df=st[i].stats.sampling_rate, corners=cors, zerophase=args.zeroph) if bdp != "0": elements = bdp.split() cors = int(elements[0]) freq_min = eval(elements[1]) freq_max = eval(elements[2]) for i in range(len(st)): st[i].data = bandpass(st[i].data, freq_min, freq_max, df=st[i].stats.sampling_rate, corners=cors, zerophase=args.zeroph) return st
def get_all_segments(raw_data, move_class, scaler): # preprocess data raw_data = savgol_filter(raw_data, 3, 2) raw_data = highpass(raw_data, 3, 50) raw_data = scaler.transform(raw_data) # extract segments limit = (len(raw_data) // SEGMENT_SIZE ) * SEGMENT_SIZE segments = [] for i in range(0, limit, int(SEGMENT_SIZE * (1 - OVERLAP))): segment = raw_data[i: (i + SEGMENT_SIZE)] segments.append(segment) return segments
def _filter_traces(self): if self.lpcut.get() > self.sampling_rate.get(): tkmessage.showerror( "Error", "Lowpass cutoff frequency greater than sampling rate.") elif self.hpcut.get() > self.sampling_rate.get(): tkmessage.showerror( "Error", "Highpass cutoff frequency greater than sampling rate.") else: for k, tr in enumerate(self._traces): if self.lowpass.get(): self._traces[k, :] = lowpass(tr, self.lpcut.get(), self.sampling_rate.get()) if self.highpass.get(): self._traces[k, :] = highpass(tr, self.hpcut.get(), self.sampling_rate.get())
def extract_feature_vector(X): # Default: 128 sets per segment with 50% overlap; currently, 8 segments per set is used due to insufficient data SEGMENT_SIZE = 8 OVERLAP = 0.5 # preprocess data X = savgol_filter(X, 3, 2) X = highpass(X, 3, 50) X = min_max_scaler.transform(X) # extract time domain features X_mean = np.mean(X, axis=0) X_var = np.var(X, axis=0) X_max = np.max(X, axis=0) X_min = np.min(X, axis=0) X_off = np.subtract(X_max, X_min) X_mad = robust.mad(X, axis=0) # extract frequency domain features X_psd = [] X_peakF = [] # obtain feature vector by appending all vectors above as one d-dimension feature vector X = np.append(X_mean, [X_var, X_max, X_min, X_off, X_mad]) return standard_scaler.transform([X])
def highpass(self, freq, corners=4, zerophase=False, traces=None): """ Butterworth-Highpass Filter of the data. Filter data removing data below certain frequency ``freq`` using ``corners`` corners. :param freq: Filter corner frequency in Hz. :param corners: Filter corners. Note: This is twice the value of PITSA's filter sections :param zerophase: If True, apply filter once forwards and once backwards. This results in twice the number of corners but zero phase shift in the resulting filtered trace. :param traces: List of ``SEGYTrace`` objects with data to operate on. Default is to operate on all traces. """ if not traces: traces = self.traces for tr in traces: df = 1.0 / (tr.header.sample_interval_in_ms_for_this_trace / 1.0e6) tr.data = filter.highpass(tr.data, freq, df, corners=corners, zerophase=zerophase)
def obspy_highpass(signal, df, freq=3.6, corners=4): """ Butterworth-Highpass Filter, removing data below certain frequency args: signal (np.array): ecg signal df (int): sampling frequency of signal freq (int): cut-off frequency corners (int): number of corners used for filtering returns: filtered signal (np.array) """ # Prevent high pass artifacts by moving the signal to start at around 0 signal -= signal[:50].mean() ret = np.zeros_like(signal) for i in range(signal.shape[-1]): ret[:, i] = highpass(data=signal[:, i], freq=freq, df=df, corners=corners) return ret
def extract_feature_vector(X): try: # preprocess data X = savgol_filter(X, 3, 2) X = highpass(X, 3, 50) X = min_max_scaler.transform(X) # # extract acceleration and angular velocity # X_accA = math.sqrt(sum(map(lambda x:x*x, np.mean(X[:, 0:3], axis=0)))) # X_accB = math.sqrt(sum(map(lambda x:x*x, np.mean(X[:, 3:6], axis=0)))) # X_gyro = math.sqrt(sum(map(lambda x:x*x, np.mean(X[:, 6:9], axis=0)))) # X_mag = np.asarray([ X_accA, X_accB, X_gyro ]) # extract time domain features X_mean = np.mean(X, axis=0) X_median = np.median(X, axis=0) # X_var = np.var(X, axis=0) X_max = np.max(X, axis=0) X_min = np.min(X, axis=0) X_off = np.subtract(X_max, X_min) X_mad = robust.mad(X, axis=0) # # extract frequency domain features # X_fft_abs = np.abs(fft(X)) #np.abs() if you want the absolute val of complex number # X_fft_mean = np.mean(X_fft_abs, axis=0) # X_fft_var = np.var(X_fft_abs, axis=0) # X_fft_max = np.max(X_fft_abs, axis=0) # X_fft_min = np.min(X_fft_abs, axis=0) # X_entr = entropy(np.abs(np.fft.rfft(X, axis=0))[1:], base=2) # return feature vector by appending all vectors above as one d-dimension feature vector X = np.append(X_mean, [X_median, X_off, X_mad]) return standard_scaler.transform([X]) except: traceback.print_exc() print("Error in predicting dance move!")
def make_stf(dt=0.10, nt=5000, fmin=1.0/100.0, fmax=1.0/8.0, filename='../INPUT/stf_new', plot=True): """ Generate a source time function for ses3d by applying a bandpass filter to a Heaviside function. make_stf(dt=0.13, nt=4000, fmin=1.0/100.0, fmax=1.0/8.0, filename='../INPUT/stf_new', plot=True) dt: Length of the time step. Must equal dt in the event_* file. nt: Number of time steps. Must equal to or greater than nt in the event_* file. fmin: Minimum frequency of the bandpass. fmax: Maximum frequency of the bandpass. filename: Output filename. """ #- Make time axis and original Heaviside function. -------------------------------------------- t = np.arange(0.0,float(nt+1)*dt,dt) h = np.ones(len(t)) #- Apply filters. ----------------------------------------------------------------------------- h = flt.highpass(h, fmin, 1.0/dt, 3, zerophase=False) h = flt.lowpass(h, fmax, 1.0/dt, 5, zerophase=False) #- Plot output. ------------------------------------------------------------------------------- if plot == True: #- Time domain. plt.plot(t,h,'k') plt.xlim(0.0,float(nt)*dt) plt.xlabel('time [s]') plt.title('source time function (time domain)') plt.show() #- Frequency domain. hf = np.fft.fft(h) f = np.fft.fftfreq(len(hf), dt) plt.semilogx(f,np.abs(hf),'k') plt.plot([fmin,fmin],[0.0, np.max(np.abs(hf))],'r--') plt.text(1.1*fmin, 0.5*np.max(np.abs(hf)), 'fmin') plt.plot([fmax,fmax],[0.0, np.max(np.abs(hf))],'r--') plt.text(1.1*fmax, 0.5*np.max(np.abs(hf)), 'fmax') plt.xlim(0.1*fmin,10.0*fmax) plt.xlabel('frequency [Hz]') plt.title('source time function (frequency domain)') plt.show() #- Write to file. ----------------------------------------------------------------------------- f = open(filename, 'w') #- Header. f.write('source time function, ses3d version 4.1\n') f.write('nt= '+str(nt)+', dt='+str(dt)+'\n') f.write('filtered Heaviside, highpass(fmin='+str(fmin)+', corners=3, zerophase=False), lowpass(fmax='+str(fmax)+', corners=5, zerophase=False)\n') f.write('-- samples --\n') for k in range(len(h)): f.write(str(h[k])+'\n') f.close()
def make_stf(dt=0.10, nt=5000, fmin=1.0 / 100.0, fmax=1.0 / 8.0, filename='../INPUT/stf_new', plot=True): """ Generate a source time function for ses3d by applying a bandpass filter to a Heaviside function. make_stf(dt=0.13, nt=4000, fmin=1.0/100.0, fmax=1.0/8.0, filename='../INPUT/stf_new', plot=True) dt: Length of the time step. Must equal dt in the event_* file. nt: Number of time steps. Must equal to or greater than nt in the event_* file. fmin: Minimum frequency of the bandpass. fmax: Maximum frequency of the bandpass. filename: Output filename. """ #- Make time axis and original Heaviside function. -------------------------------------------- t = np.arange(0.0, float(nt + 1) * dt, dt) h = np.ones(len(t)) #- Apply filters. ----------------------------------------------------------------------------- h = flt.highpass(h, fmin, 1.0 / dt, 3, zerophase=False) h = flt.lowpass(h, fmax, 1.0 / dt, 5, zerophase=False) #- Plot output. ------------------------------------------------------------------------------- if plot == True: #- Time domain. plt.plot(t, h, 'k') plt.xlim(0.0, float(nt) * dt) plt.xlabel('time [s]') plt.title('source time function (time domain)') plt.show() #- Frequency domain. hf = np.fft.fft(h) f = np.fft.fftfreq(len(hf), dt) plt.semilogx(f, np.abs(hf), 'k') plt.plot([fmin, fmin], [0.0, np.max(np.abs(hf))], 'r--') plt.text(1.1 * fmin, 0.5 * np.max(np.abs(hf)), 'fmin') plt.plot([fmax, fmax], [0.0, np.max(np.abs(hf))], 'r--') plt.text(1.1 * fmax, 0.5 * np.max(np.abs(hf)), 'fmax') plt.xlim(0.1 * fmin, 10.0 * fmax) plt.xlabel('frequency [Hz]') plt.title('source time function (frequency domain)') plt.show() #- Write to file. ----------------------------------------------------------------------------- f = open(filename, 'w') #- Header. f.write('source time function, ses3d version 4.1\n') f.write('nt= ' + str(nt) + ', dt=' + str(dt) + '\n') f.write('filtered Heaviside, highpass(fmin=' + str(fmin) + ', corners=3, zerophase=False), lowpass(fmax=' + str(fmax) + ', corners=5, zerophase=False)\n') f.write('-- samples --\n') for k in range(len(h)): f.write(str(h[k]) + '\n') f.close()
def process(tr, lowcut, highcut, filt_order, samp_rate, debug, starttime=False, clip=False, length=86400, seisan_chan_names=False, ignore_length=False, fill_gaps=True): """ Basic function to process data, usually called by dayproc or shortproc. Functionally, this will bandpass, downsample and check headers and length of trace to ensure files start when they should and are the correct length. This is a simple wrapper on obspy functions, we include it here to provide a system to ensure all parts of the dataset are processed in the same way. .. note:: Usually this function is called via dayproc or shortproc. :type tr: obspy.core.trace.Trace :param tr: Trace to process :type lowcut: float :param lowcut: Low cut in Hz, if set to None and highcut is set, will use \ a lowpass filter. :type highcut: float :param highcut: High cut in Hz, if set to None and lowcut is set, will \ use a highpass filter. :type filt_order: int :param filt_order: Number of corners for filter. :type samp_rate: float :param samp_rate: Desired sampling rate in Hz. :type debug: int :param debug: Debug output level from 0-5, higher numbers = more output. :type starttime: obspy.core.utcdatetime.UTCDateTime :param starttime: Desired start of trace :type clip: bool :param clip: Whether to expect, and enforce a set length of data or not. :type length: float :param length: Use to set a fixed length for data from the given starttime. :type seisan_chan_names: bool :param seisan_chan_names: Whether channels are named like seisan channels (which are two letters rather than SEED convention of three) - defaults to True. :type ignore_length: bool :param ignore_length: See warning in dayproc. :type fill_gaps: bool :param fill_gaps: Whether to pad any gaps found with zeros or not. :return: Processed trace. :type: :class:`obspy.core.stream.Trace` """ # Add sanity check if highcut and highcut >= 0.5 * samp_rate: raise IOError('Highcut must be lower than the nyquist') # Define the start-time if starttime: # Be nice and allow a datetime object. if isinstance(starttime, dt.date) or isinstance( starttime, dt.datetime): starttime = UTCDateTime(starttime) day = starttime.date else: day = tr.stats.starttime.date debug_print('Working on: ' + tr.stats.station + '.' + tr.stats.channel, 2, debug) if debug >= 5: tr.plot() # Check if the trace is gappy and pad if it is. gappy = False if isinstance(tr.data, np.ma.MaskedArray): gappy = True gaps, tr = _fill_gaps(tr) # Do a brute force quality check qual = _check_daylong(tr) if not qual: msg = ("Data have more zeros than actual data, please check the raw", " data set-up and manually sort it: " + tr.stats.station + "." + tr.stats.channel) raise ValueError(msg) tr = tr.detrend('simple') # Detrend data before filtering debug_print( 'I have ' + str(len(tr.data)) + ' data points for ' + tr.stats.station + '.' + tr.stats.channel + ' before processing', 0, debug) # Sanity check to ensure files are daylong padded = False if float(tr.stats.npts / tr.stats.sampling_rate) != length and clip: debug_print( 'Data for ' + tr.stats.station + '.' + tr.stats.channel + ' are not of daylong length, will zero pad', 2, debug) if tr.stats.endtime - tr.stats.starttime < 0.8 * length\ and not ignore_length: raise NotImplementedError( "Data for {0}.{1} is {2} hours long, which is less than 80 " "percent of the desired length, will not pad".format( tr.stats.station, tr.stats.channel, (tr.stats.endtime - tr.stats.starttime) / 3600)) # trim, then calculate length of any pads required tr = tr.trim(starttime, starttime + length, nearest_sample=True) pre_pad_secs = tr.stats.starttime - starttime post_pad_secs = (starttime + length) - tr.stats.endtime if pre_pad_secs > 0 or post_pad_secs > 0: padded = True pre_pad = np.zeros(int(pre_pad_secs * tr.stats.sampling_rate)) post_pad = np.zeros(int(post_pad_secs * tr.stats.sampling_rate)) debug_print(str(tr), 2, debug) debug_print( "Padding to day long with %f s before and %f s at end" % (pre_pad_secs, post_pad_secs), 1, debug) tr.data = np.concatenate([pre_pad, tr.data, post_pad]) # Use this rather than the expected pad because of rounding samples tr.stats.starttime -= len(pre_pad) * tr.stats.delta debug_print(str(tr), 2, debug) # If there is one sample too many after this remove the first one # by convention if len(tr.data) == (length * tr.stats.sampling_rate) + 1: tr.data = tr.data[1:len(tr.data)] if not tr.stats.sampling_rate * length == tr.stats.npts: raise ValueError('Data are not daylong for ' + tr.stats.station + '.' + tr.stats.channel) debug_print( 'I now have %i data points after enforcing length' % len(tr.data), 0, debug) # Check sampling rate and resample if tr.stats.sampling_rate != samp_rate: debug_print('Resampling', 1, debug) tr.resample(samp_rate) # Filtering section tr = tr.detrend('simple') # Detrend data again before filtering if highcut and lowcut: debug_print('Bandpassing', 1, debug) tr.data = bandpass(tr.data, lowcut, highcut, tr.stats.sampling_rate, filt_order, True) elif highcut: debug_print('Lowpassing', 1, debug) tr.data = lowpass(tr.data, highcut, tr.stats.sampling_rate, filt_order, True) elif lowcut: debug_print('Highpassing', 1, debug) tr.data = highpass(tr.data, lowcut, tr.stats.sampling_rate, filt_order, True) else: debug_print('No filters applied', 2, debug) # Account for two letter channel names in s-files and therefore templates if seisan_chan_names: tr.stats.channel = tr.stats.channel[0] + tr.stats.channel[-1] # Sanity check the time header if tr.stats.starttime.day != day and clip: debug_print( "Time headers do not match expected date: {0}".format( tr.stats.starttime), 2, debug) if padded: debug_print("Reapplying zero pads post processing", 1, debug) debug_print(str(tr), 2, debug) pre_pad = np.zeros(int(pre_pad_secs * tr.stats.sampling_rate)) post_pad = np.zeros(int(post_pad_secs * tr.stats.sampling_rate)) pre_pad_len = len(pre_pad) post_pad_len = len(post_pad) debug_print( "Taking only valid data between %i and %i samples" % (pre_pad_len, len(tr.data) - post_pad_len), 1, debug) # Re-apply the pads, taking only the data section that was valid tr.data = np.concatenate([ pre_pad, tr.data[pre_pad_len:len(tr.data) - post_pad_len], post_pad ]) debug_print(str(tr), 2, debug) # Sanity check to ensure files are daylong if float(tr.stats.npts / tr.stats.sampling_rate) != length and clip: debug_print( 'Data for ' + tr.stats.station + '.' + tr.stats.channel + ' are not of daylong length, will zero pad', 1, debug) # Use obspy's trim function with zero padding tr = tr.trim(starttime, starttime + length, pad=True, fill_value=0, nearest_sample=True) # If there is one sample too many after this remove the last one # by convention if len(tr.data) == (length * tr.stats.sampling_rate) + 1: tr.data = tr.data[1:len(tr.data)] if not tr.stats.sampling_rate * length == tr.stats.npts: raise ValueError('Data are not daylong for ' + tr.stats.station + '.' + tr.stats.channel) # Replace the gaps with zeros if gappy: tr = _zero_pad_gaps(tr, gaps, fill_gaps=fill_gaps) # Final visual check for debug if debug > 4: tr.plot() return tr
def crossc(dstart,dend,ch1,ch2,day): # here you load all the functions you need to use from obspy.seg2.seg2 import readSEG2 from obspy.core import Stream import numpy as np from obspy.signal.cross_correlation import xcorr from numpy import sign from obspy.signal.filter import lowpass from obspy.signal.filter import highpass from obspy.signal.filter import bandstop from obspy.signal.filter import bandpass dataDir = "/import/three-data/hadzii/STEINACH/STEINACH_longtime/" outdir = "/home/jsalvermoser/Desktop/Processing/bands_SNR/" + "CH" + str(ch1) + "_CH" + str(ch2) + "/" + "JAN" + str(day) + "/" # loading the info for outfile-name stream_start = readSEG2(dataDir + str(dstart) + ".dat") t_start = stream_start[ch1].stats.seg2.ACQUISITION_TIME stream_end = readSEG2(dataDir + str(dend) + ".dat") t_end = stream_end[ch1].stats.seg2.ACQUISITION_TIME # initialization of the arrays and variables TR = [] rms = [] sq = [] ncalm = 1 nbeat = 1 corr128_calm = 0 corr128_beat = 0 nerror = 0 mu1c=0 mu2c=0 mu3c=0 mu1b=0 mu2b=0 mu3b=0 var1c=0 var2c=0 var3c=0 var1b=0 var2b=0 var3b=0 SNR_calm_b1=[] SNR_calm_b2=[] SNR_calm_b3=[] SNR_beat_b1=[] SNR_beat_b2=[] SNR_beat_b3=[] #TAPER taper_percentage=0.05 taper= np.blackman(int(len(time_vector) * taper_percentage)) taper_left, taper_right = np.array_split(taper,2) taper = np.concatenate([taper_left,np.ones(len(time_vector)-len(taper)),taper_right]) for j in range(0, dend-dstart): sq.append([]) for k in range(dstart, dend, 4): start = k end = k + 5 # only used to merge 5-1 = 4 files to one stream try: st1 = merge_single(ch1,start,end) st2 = merge_single(ch2,start,end) st1.detrend('linear') st2.detrend('linear') # calculate squares for rms r = k-dstart sq[r] = 0 for h in range(0,64000): sq[r] += (st1[0].data[h])**2 # lowpass-filter the crossc_beat correlation function st1.filter('lowpass',freq = 24, zerophase=True, corners=8) st1.filter('highpass', freq= 0.05, zerophase=True, corners=2) #had to be reduced from 0.1Hz st1.filter('bandstop', freqmin=8, freqmax=14, corners=4, zerophase=True) st2.filter('lowpass',freq = 24, zerophase=True, corners=8) st2.filter('highpass', freq= 0.05, zerophase=True, corners=2) #had to be reduced from 0.1Hz st2.filter('bandstop', freqmin=8, freqmax=14, corners=4, zerophase=True) # sometimes channels seem to fail, so I put this to prevent crashing of the program # 1-bit normalization tr1 = sign(st1[0].data) tr2 = sign(st2[0].data) # cross-correlation index, value, acorr = xcorr(tr1, tr2, 25000, full_xcorr=True) print sq[r] # check sanity if np.max(acorr)>1: acorr = zeros(50001) # sort the 128sec files into calm and beat: # the value was chosen after observing calm files if sq[r] < 1000000000000: corr128_calm += acorr ncalm += 1. else: corr128_beat += acorr nbeat += 1. print ncalm, nbeat # just to check if calm or noisy except: nerror += 1 print "%d : ERROR" %(r) if ncalm<8: corr128_calm = np.zeros(50001) # normalization else: corr128_calm = (corr128_calm/ncalm) * taper corr128_beat = (corr128_beat/nbeat) * taper # filter again and divide into 3 bands which can be investigated separately corr128_calm_band1 = highpass(corr128_calm, freq=0.1, corners=4, zerophase=True, df=500.) corr128_calm_band1 = lowpass(corr128_calm_band1, freq=2, corners=4, zerophase=True, df=500.) corr128_calm_band2 = bandpass(corr128_calm, freqmin=2, freqmax=8, df=500., corners=4, zerophase=True) corr128_calm_band3 = bandpass(corr128_calm, freqmin=8, freqmax=24, df=500., corners=4, zerophase=True) corr128_beat_band1 = highpass(corr128_beat, freq=0.1, df=500., corners=4, zerophase=True) corr128_beat_band1 = lowpass(corr128_beat_band1, freq=2, corners=4, zerophase=True, df=500.) corr128_beat_band2 = bandpass(corr128_beat, freqmin=2, freqmax=8, df=500., corners=4, zerophase=True) corr128_beat_band3 = bandpass(corr128_beat, freqmin=8, freqmax=24, df=500., corners=4, zerophase=True) # SNR (Signal-to-Noise Ratio):print 222222 # for the signal-to-noise ratio one divides the maximum of the signal by the # variance of a late window (noise). As we don't know which window has the # lowest signal fraction, we loop over some windows. We need windows of # different lengths for the different bands as different frequencies are # contained. For every band the minimum-frequency fmin is chosen (e.g. 4Hz), then # the time for one cyle is 1/fc (e.g. 0.25s) and as we take windows of 3-4 # cycles we choose a window length of 4*0.25s = 1s ## CALM + BEAT for isnrb1 in range(45000,50000,2500): # steps of half a windowlength endwb1=isnrb1 + 2500 # 5s window SNR_calm_b1.append(np.max(np.abs(corr128_calm_band1))/np.std(corr128_calm_band1[isnrb1:endwb1])) SNR_beat_b1.append(np.max(np.abs(corr128_beat_band1))/np.std(corr128_beat_band1[isnrb1:endwb1])) SNR_calm_b1 = max(SNR_calm_b1) SNR_beat_b1 = max(SNR_beat_b1) for isnrb2 in range(45000,49001,500): # steps of half a windowlength endwb2=isnrb2 + 1000 # 2s windows SNR_calm_b2.append(np.max(np.abs(corr128_calm_band2))/np.std(corr128_calm_band2[isnrb2:endwb2])) SNR_beat_b2.append(np.max(np.abs(corr128_beat_band2))/np.std(corr128_beat_band2[isnrb2:endwb2])) SNR_beat_b2 = max(SNR_beat_b2) SNR_calm_b2 = max(SNR_calm_b2) for isnrb3 in range(45000,49751,125): # steps of half a windowlength endwb3=isnrb3 + 250 # 0.5s windows SNR_calm_b3.append(np.max(np.abs(corr128_calm_band3))/np.std(corr128_calm_band3[isnrb3:endwb3])) SNR_beat_b3.append(np.max(np.abs(corr128_beat_band3))/np.std(corr128_beat_band3[isnrb3:endwb3])) SNR_beat_b3 = max(SNR_beat_b3) SNR_calm_b3 = max(SNR_calm_b3) if ncalm<8: SNR_calm_b1 = 0 SNR_calm_b2 = 0 SNR_calm_b3 = 0 print SNR_calm_b1, SNR_calm_b2, SNR_calm_b3 print SNR_beat_b1, SNR_beat_b2, SNR_beat_b3 # RMS for histogram and sifting: #for s in range(0,dend-dstart): # rms.append((sq[s]/16000)**(0.5)) # save into files: np.save(outdir + t_start + "-" + t_end + "CH" + str(ch1) + "_" +"xcorr128s_beat_0-2Hz" + "_" + "CH" + str(ch2), corr128_beat_band1) np.save(outdir + t_start + "-" + t_end + "CH" + str(ch1) + "_" +"xcorr128s_beat_2-8Hz" + "_" + "CH" + str(ch2), corr128_beat_band2) np.save(outdir + t_start + "-" + t_end + "CH" + str(ch1) + "_" +"xcorr128s_beat_8-24Hz" + "_" + "CH" + str(ch2), corr128_beat_band3) np.save(outdir + t_start + "-" + t_end + "CH" + str(ch1) + "_" +"xcorr128s_calm_0-2Hz" + "_" + "CH" + str(ch2), corr128_calm_band1) np.save(outdir + t_start + "-" + t_end + "CH" + str(ch1) + "_" +"xcorr128s_calm_2-8Hz" + "_" + "CH" + str(ch2), corr128_calm_band2) np.save(outdir + t_start + "-" + t_end + "CH" + str(ch1) + "_" +"xcorr128s_calm_8-24Hz" + "_" + "CH" + str(ch2), corr128_calm_band3) # np.save(outdir + "JAN_"+"CH" + str(ch1) + "_" +"RMS" + "_" + "CH" + str(ch2) + str(dstart) + "-" + str(dend), rms) return corr128_beat_band1,corr128_beat_band2,corr128_beat_band3, corr128_calm_band1,corr128_calm_band2,corr128_calm_band3, ncalm, nbeat, SNR_beat_b1, SNR_beat_b2, SNR_beat_b3, SNR_calm_b1, SNR_calm_b2, SNR_calm_b3
def process(tr, lowcut, highcut, filt_order, samp_rate, debug, starttime=False, full_day=False): r"""Basic function to bandpass, downsample and check headers and length \ of trace to ensure files start at the start of a day and are daylong. Works in place on data. This is employed to ensure all parts of the data \ are processed in the same way. .. note:: Usually this function is called via dayproc or shortproc. :type tr: obspy.Trace :param tr: Trace to process :type highcut: float :param highcut: High cut in Hz, if set to None and lowcut is set, will \ use a highpass filter. :type lowcut: float :type lowcut: Low cut in Hz, if set to None and highcut is set, will use \ a lowpass filter. :type filt_order: int :param filt_order: Number of corners for filter. :type samp_rate: float :param samp_rate: Desired sampling rate in Hz :type debug: int :param debug: Debug output level from 0-5, higher numbers = more output :type starttime: obspy.UTCDateTime :param starttime: Desired start of trace :type full_day: bool :param full_day: Whether to expect, and enforce a full day of data or not. :return: obspy.Stream .. note:: Will convert channel names to two charecters long. """ import warnings from obspy.signal.filter import bandpass, lowpass, highpass # Add sanity check if highcut and highcut >= 0.5*samp_rate: raise IOError('Highcut must be lower than the nyquist') # Define the start-time if starttime: day = starttime.date else: day = tr.stats.starttime.date if debug >= 2: print('Working on: '+tr.stats.station+'.'+tr.stats.channel) if debug >= 5: tr.plot() # Do a brute force quality check qual = _check_daylong(tr) if not qual: msg = ("Data have more zeros than actual data, please check the raw", " data set-up and manually sort it") raise ValueError(msg) tr = tr.detrend('simple') # Detrend data before filtering # If there is one sample too many remove the first sample - this occurs # at station FOZ where the first sample is zero when it shouldn't be, # Not real sample: generated during data download # if full_day: # if len(tr.data) == (86400 * tr.stats.sampling_rate) + 1: # tr.data = tr.data[1:len(tr.data)] if debug > 0: print('I have '+str(len(tr.data))+' data points for ' + tr.stats.station+'.'+tr.stats.channel+' before processing') # Sanity check to ensure files are daylong if float(tr.stats.npts / tr.stats.sampling_rate) != 86400.0\ and full_day: if debug >= 2: print('Data for '+tr.stats.station+'.'+tr.stats.channel + ' is not of daylong length, will zero pad') # Work out when the trace thinks it is starting # traceday = UTCDateTime(str(tr.stats.starttime.year)+'-' + # str(tr.stats.starttime.month)+'-' + # str(tr.stats.starttime.day)) # Use obspy's trim function with zero padding tr = tr.trim(starttime, starttime+86400, pad=True, fill_value=0, nearest_sample=True) # If there is one sample too many after this remove the last one # by convention if len(tr.data) == (86400 * tr.stats.sampling_rate) + 1: tr.data = tr.data[1:len(tr.data)] if not tr.stats.sampling_rate * 86400 == tr.stats.npts: raise ValueError('Data are not daylong for '+tr.stats.station + '.'+tr.stats.channel) print('I now have '+str(len(tr.data)) + ' data points after enforcing day length') # Check sampling rate and resample if tr.stats.sampling_rate != samp_rate: if debug >= 2: print('Resampling') tr.resample(samp_rate) # Filtering section tr = tr.detrend('simple') # Detrend data again before filtering if highcut and lowcut: if debug >= 2: print('Bandpassing') tr.data = bandpass(tr.data, lowcut, highcut, tr.stats.sampling_rate, filt_order, True) elif highcut: if debug >= 2: print('Lowpassing') tr.data = lowpass(tr.data, highcut, tr.stats.sampling_rate, filt_order, True) elif lowcut: if debug >= 2: print('Highpassing') tr.data = highpass(tr.data, lowcut, tr.stats.sampling_rate, filt_order, True) else: warnings.warn('No filters applied') # Account for two letter channel names in s-files and therefore templates tr.stats.channel = tr.stats.channel[0]+tr.stats.channel[-1] # Sanity check the time header if tr.stats.starttime.day != day != day and full_day: warnings.warn("Time headers do not match expected date: " + str(tr.stats.starttime)) # Sanity check to ensure files are daylong if float(tr.stats.npts / tr.stats.sampling_rate) != 86400.0 and full_day: if debug >= 2: print('Data for '+tr.stats.station+'.'+tr.stats.channel + ' is not of daylong length, will zero pad') # Use obspy's trim function with zero padding tr = tr.trim(starttime, starttime+86400, pad=True, fill_value=0, nearest_sample=True) # If there is one sample too many after this remove the last one # by convention if len(tr.data) == (86400 * tr.stats.sampling_rate) + 1: tr.data = tr.data[1:len(tr.data)] if not tr.stats.sampling_rate*86400 == tr.stats.npts: raise ValueError('Data are not daylong for '+tr.stats.station + '.'+tr.stats.channel) # Final visual check for debug if debug >= 4: tr.plot() return tr
def stalta_eventwindow_function(data, f1 = 0, f2 = 0, wsta = 10, wlta = 60, thrON = 3.0, thrOFF = 2.0): # function computing STA/LTA of the data channel # data =[times values], with times in seconds # f1, f2 corner frequencies of the bandpass signal # if f2 = 0 assume a high pass # if f1 = 0 assume a low pass # wsta is the STA window size in seconds # wlta is the LTA window size in seconds # thrON and thrOFF are the trigger values for STA/LTA # on output stalta=[times stalta] # on output events=[event_start_time event_end_time SNR=max_stalta/1.0] from getopt import getopt from scipy import signal from numpy import loadtxt, savetxt import numpy import scipy import matplotlib.pyplot as plt import sys import obspy.signal.filter as obspy_filter from obspy.signal import trigger #from obspy.signal import filter # start function computations stalta = numpy.zeros(data.shape) wavesor = data[:,1] times = data[:,0] dt = times[2] - times[1] dt = float(dt) fs = 1 / dt nsta= int( wsta / dt ) nlta= int( wlta / dt ) waves = numpy.zeros(wavesor.shape) waves[:]=wavesor[:] # filtering if (f1 > 0) & (f2 > 0): waves = obspy_filter.bandpass(wavesor, f1, f2, fs, corners=4, zerophase=True) elif (f1 > 0) & (f2 <= 0): waves = obspy_filter.highpass(wavesor, f1, fs, corners=4, zerophase=True) elif (f1 <= 0) & (f2 > 0): waves = obspy_filter.lowpass(wavesor, f2, fs, corners=4, zerophase=True) # print("Calculating STA/LTA...") stalta_comp = trigger.classic_sta_lta(waves, nsta, nlta) eventlist = trigger.trigger_onset(stalta_comp, thrON, thrOFF, max_len=9e+99, max_len_delete=False) nev=len(eventlist) if nev > 0: eventtimes=(eventlist)*dt + times[0] events=numpy.zeros((nev,3)) events[:,0:2:1]=eventtimes[:,0:2:1] for n in range(0,nev) : n1=eventlist[n,0] n2=eventlist[n,1] events[n,2] = numpy.amax(stalta_comp[n1:n2+1:1]) / 1.0 else: events=numpy.zeros((nev,3)) stalta[:,0]=times stalta[:,1]=stalta_comp return stalta, events ;
taper= np.blackman(int(len(time_vector) * taper_percentage)) taper_left, taper_right = np.array_split(taper,2) taper = np.concatenate([taper_left,np.ones(len(time_vector)-len(taper)),taper_right]) ax = plt.subplot(111) for k in range(1048728,1048840,4): end=k+4 print end tr1=merge_single(6,k,end) tr2=merge_single(7,k,end) tr1.detrend('linear') tr2.detrend('linear') tr1.filter('lowpass',freq = 24, zerophase=True, corners=8) tr1.filter('highpass', freq= 0.05, zerophase=True, corners=2) tr1.filter('bandstop', freqmin=8, freqmax=14, corners=4, zerophase=True) tr2.filter('lowpass',freq = 24, zerophase=True, corners=8) tr2.filter('highpass', freq= 0.05, zerophase=True, corners=2) tr2.filter('bandstop', freqmin=8, freqmax=14, corners=4, zerophase=True) tr1=sign(tr1.data) tr2=sign(tr2.data) index,value,acorr = xcorr(tr1, tr2, 25000, full_xcorr=True) acorr = acorr*taper acorr = highpass(acorr, freq=0.1, corners=4, zerophase=True, df=500.) acorr = lowpass(acorr, freq=2, corners=4, zerophase=True, df=500.) ax.plot(time_vector,acorr/np.max(acorr) +k-1048728) corr+=acorr ax.plot(time_vector,corr/np.max(corr)-4) plt.show()
def process(tr, lowcut, highcut, filt_order, samp_rate, starttime=False, clip=False, length=86400, seisan_chan_names=False, ignore_length=False, fill_gaps=True, ignore_bad_data=False, fft_threads=1): """ Basic function to process data, usually called by dayproc or shortproc. Functionally, this will bandpass, downsample and check headers and length of trace to ensure files start when they should and are the correct length. This is a simple wrapper on obspy functions, we include it here to provide a system to ensure all parts of the dataset are processed in the same way. .. note:: Usually this function is called via dayproc or shortproc. :type tr: obspy.core.trace.Trace :param tr: Trace to process :type lowcut: float :param lowcut: Low cut in Hz, if set to None and highcut is set, will use a lowpass filter. :type highcut: float :param highcut: High cut in Hz, if set to None and lowcut is set, will use a highpass filter. :type filt_order: int :param filt_order: Number of corners for filter. :type samp_rate: float :param samp_rate: Desired sampling rate in Hz. :type starttime: obspy.core.utcdatetime.UTCDateTime :param starttime: Desired start of trace :type clip: bool :param clip: Whether to expect, and enforce a set length of data or not. :type length: float :param length: Use to set a fixed length for data from the given starttime. :type seisan_chan_names: bool :param seisan_chan_names: Whether channels are named like seisan channels (which are two letters rather than SEED convention of three) - defaults to True. :type ignore_length: bool :param ignore_length: See warning in dayproc. :type fill_gaps: bool :param fill_gaps: Whether to pad any gaps found with zeros or not. :type ignore_bad_data: bool :param ignore_bad_data: If False (default), errors will be raised if data are excessively gappy or are mostly zeros. If True then no error will be raised, but an empty trace will be returned. :type fft_threads: int :param fft_threads: Number of threads to use for pyFFTW FFT in resampling :return: Processed trace. :type: :class:`obspy.core.stream.Trace` .. note:: If your data contain gaps you should *NOT* fill those gaps before using the pre-process functions. The pre-process functions will fill the gaps internally prior to processing, process the data, then re-fill the gaps with zeros to ensure correlations are not incorrectly calculated within gaps. If your data have gaps you should pass a merged stream without the `fill_value` argument (e.g.: `tr = tr.merge()`). """ # Add sanity check if highcut and highcut >= 0.5 * samp_rate: raise IOError('Highcut must be lower than the nyquist') # Define the start-time if starttime: # Be nice and allow a datetime object. if isinstance(starttime, dt.date) or isinstance(starttime, dt.datetime): starttime = UTCDateTime(starttime) Logger.debug('Working on: {0}'.format(tr.id)) # Check if the trace is gappy and pad if it is. gappy = False if isinstance(tr.data, np.ma.MaskedArray): gappy = True gaps, tr = _fill_gaps(tr) # Do a brute force quality check qual = _check_daylong(tr) if not qual: msg = ("Data have more zeros than actual data, please check the raw", " data set-up and manually sort it: " + tr.stats.station + "." + tr.stats.channel) if not ignore_bad_data: raise ValueError(msg) else: Logger.warning(msg) return Trace(data=np.array([]), header={ "station": tr.stats.station, "channel": tr.stats.channel, "network": tr.stats.network, "location": tr.stats.location, "starttime": tr.stats.starttime, "sampling_rate": tr.stats.sampling_rate}) tr = tr.detrend('simple') # Detrend data before filtering Logger.debug('I have {0} data points for {1} before processing'.format( tr.stats.npts, tr.id)) # Sanity check to ensure files are daylong padded = False if clip: tr = tr.trim(starttime, starttime + length, nearest_sample=True) if float(tr.stats.npts / tr.stats.sampling_rate) != length and clip: Logger.info( 'Data for {0} are not long-enough, will zero pad'.format( tr.id)) if tr.stats.endtime - tr.stats.starttime < 0.8 * length\ and not ignore_length: msg = ( "Data for {0}.{1} is {2:.2f} seconds long, which is less than " "80 percent of the desired length ({3} seconds), will not " "pad".format( tr.stats.station, tr.stats.channel, tr.stats.endtime - tr.stats.starttime, length)) if not ignore_bad_data: raise NotImplementedError(msg) else: Logger.warning(msg) return Trace(data=np.array([]), header={ "station": tr.stats.station, "channel": tr.stats.channel, "network": tr.stats.network, "location": tr.stats.location, "starttime": tr.stats.starttime, "sampling_rate": tr.stats.sampling_rate}) # trim, then calculate length of any pads required pre_pad_secs = tr.stats.starttime - starttime post_pad_secs = (starttime + length) - tr.stats.endtime if pre_pad_secs > 0 or post_pad_secs > 0: padded = True pre_pad = np.zeros(int(pre_pad_secs * tr.stats.sampling_rate)) post_pad = np.zeros(int(post_pad_secs * tr.stats.sampling_rate)) Logger.debug(str(tr)) Logger.debug("Padding to day long with {0} s before and {1} s " "at end".format(pre_pad_secs, post_pad_secs)) tr.data = np.concatenate([pre_pad, tr.data, post_pad]) # Use this rather than the expected pad because of rounding samples tr.stats.starttime -= len(pre_pad) * tr.stats.delta Logger.debug(str(tr)) # If there is one sample too many after this remove the first one # by convention if len(tr.data) == (length * tr.stats.sampling_rate) + 1: tr.data = tr.data[1:len(tr.data)] if tr.stats.sampling_rate * length != tr.stats.npts: raise ValueError('Data are not long enough for ' + tr.stats.id) Logger.debug( 'I now have {0} data points after enforcing length'.format( tr.stats.npts)) # Check sampling rate and resample if tr.stats.sampling_rate != samp_rate: Logger.debug('Resampling') tr = _resample(tr, samp_rate, threads=fft_threads) # Filtering section tr = tr.detrend('simple') # Detrend data again before filtering if highcut and lowcut: Logger.debug('Bandpassing') tr.data = bandpass(tr.data, lowcut, highcut, tr.stats.sampling_rate, filt_order, True) elif highcut: Logger.debug('Lowpassing') tr.data = lowpass(tr.data, highcut, tr.stats.sampling_rate, filt_order, True) elif lowcut: Logger.debug('Highpassing') tr.data = highpass(tr.data, lowcut, tr.stats.sampling_rate, filt_order, True) else: Logger.warning('No filters applied') # Account for two letter channel names in s-files and therefore templates if seisan_chan_names: tr.stats.channel = tr.stats.channel[0] + tr.stats.channel[-1] if padded: Logger.debug("Reapplying zero pads post processing") Logger.debug(str(tr)) pre_pad = np.zeros(int(pre_pad_secs * tr.stats.sampling_rate)) post_pad = np.zeros(int(post_pad_secs * tr.stats.sampling_rate)) pre_pad_len = len(pre_pad) post_pad_len = len(post_pad) Logger.debug( "Taking only valid data between {0} and {1} samples".format( pre_pad_len, tr.stats.npts - post_pad_len)) # Re-apply the pads, taking only the data section that was valid tr.data = np.concatenate( [pre_pad, tr.data[pre_pad_len: len(tr.data) - post_pad_len], post_pad]) Logger.debug(str(tr)) # Sanity check to ensure files are daylong if float(tr.stats.npts / tr.stats.sampling_rate) != length and clip: Logger.info( 'Data for {0} are not of daylong length, will zero pad'.format( tr.id)) # Use obspy's trim function with zero padding tr = tr.trim(starttime, starttime + length, pad=True, fill_value=0, nearest_sample=True) # If there is one sample too many after this remove the last one # by convention if len(tr.data) == (length * tr.stats.sampling_rate) + 1: tr.data = tr.data[1:len(tr.data)] if not tr.stats.sampling_rate * length == tr.stats.npts: raise ValueError('Data are not daylong for ' + tr.stats.station + '.' + tr.stats.channel) # Replace the gaps with zeros if gappy: tr = _zero_pad_gaps(tr, gaps, fill_gaps=fill_gaps) return tr
def create_kiknet_acc(recid, path_kiknet_folder, fminNS2, fmaxNS2, fminEW2, fmaxEW2): """ KiK-net acc are stored within Database_small.hdf5 file """ # Import libraries import numpy as np from obspy.core import Trace, UTCDateTime import re from obspy.signal import filter # desc1 = "" # desc2 = "" time1 = [] time2 = [] inp_acc1 = [] inp_acc2 = [] npts1 = [] npts2 = [] for i in range(1, 3): if i == 1: comp = 'EW2' fmin = fminEW2 fmax = fmaxEW2 elif i == 2: comp = 'NS2' fmin = fminNS2 fmax = fmaxNS2 file_acc = path_kiknet_folder + '/' + str(recid) + '/' + str( recid) + '.' + comp hdrnames = [ 'Origin Time', 'Lat.', 'Long.', 'Depth. (km)', 'Mag.', 'Station Code', 'Station Lat.', 'Station Long.', 'Station Height(m)', 'Record Time', 'Sampling Freq(Hz)', 'Duration Time(s)', 'Dir.', 'Scale Factor', 'Max. Acc. (gal)', 'Last Correction', 'Memo.' ] acc_data = [] time = [] with open(file_acc, 'r') as f: content = f.readlines() counter = 0 for line in content: if counter < 17: if not line.startswith(hdrnames[counter]): sys.exit("Expected line to start with %s but got %s " % (hdrnames[counter], line)) else: flds = line.split() if (counter == 0): origin_time = flds[2] + ' ' + flds[3] origin_time = UTCDateTime.strptime(origin_time, '%Y/%m/%d %H:%M:%S') # All times are in Japanese standard time which is 9 hours ahead of UTC origin_time -= 9 * 3600. elif (counter == 1): lat = float(flds[1]) elif (counter == 2): lon = float(flds[1]) elif (counter == 3): dp = float(flds[2]) elif (counter == 4): mag = float(flds[1]) elif (counter == 5): stnm = flds[2] elif (counter == 6): stla = float(flds[2]) elif (counter == 7): stlo = float(flds[2]) elif (counter == 8): stel = float(flds[2]) elif (counter == 9): record_time = flds[2] + ' ' + flds[3] # A 15 s delay is added to the record time by the # the K-NET and KiK-Net data logger record_time = UTCDateTime.strptime(record_time, '%Y/%m/%d %H:%M:%S') - 15.0 # All times are in Japanese standard time which is 9 hours ahead of UTC record_time -= 9 * 3600. elif (counter == 10): freqstr = flds[2] m = re.search('[0-9]*', freqstr) freq = int(m.group()) elif (counter == 11): duration = float(flds[2]) elif (counter == 12): channel = flds[1].replace('-', '') kiknetcomps = { '1': 'NS1', '2': 'EW1', '3': 'UD1', '4': 'NS2', '5': 'EW2', '6': 'UD2' } if channel.strip() in kiknetcomps.keys( ): # kiknet directions are 1-6 channel = kiknetcomps[channel.strip()] elif (counter == 13): eqn = flds[2] num, denom = eqn.split('/') num = float(re.search('[0-9]*', num).group()) denom = float(denom) # convert the calibration from gal to m/s^2 calib = 0.01 * num / denom elif (counter == 14): accmax = float(flds[3]) elif (counter == 15): last_correction = flds[2] + ' ' + flds[3] last_correction = UTCDateTime.strptime(last_correction, '%Y/%m/%d %H:%M:%S') # All times are in Japanese standard time which is 9 hours ahead of UTC last_correction -= 9 * 3600. elif counter > 16: data = str(line).split() for value in data: a = float(value) acc_data.append(a) counter = counter + 1 data = np.array(acc_data) tr = Trace(data) tr.detrend("linear") tr.taper(max_percentage=0.05, type='cosine', side='both') filter_order = 4 pad = np.zeros(int(round(1.5 * filter_order / fmin * freq))) tr.data = np.concatenate([pad, tr.data, pad]) fN = freq / 2 if fmax < fN: tr.data = filter.bandpass(tr.data, freqmin=fmin, freqmax=fmax, df=freq, corners=4, zerophase=True) else: tr.data = filter.highpass(tr.data, freq=fmin, df=freq, corners=4, zerophase=True) tr.data = tr.data[len(pad):len(tr.data) - len(pad)] tr.data = tr.data * calib / 9.81 #in g npts = len(tr.data) time = [] for j in range(0, npts): t = j * 1 / freq time.append(t) time = np.asarray(time) if i == 1: inp_acc1 = tr.data npts1 = npts time1 = time if i == 2: inp_acc2 = tr.data npts2 = npts time2 = time return time1, time2, inp_acc1, inp_acc2, npts1, npts2
def process(tr, lowcut, highcut, filt_order, samp_rate, debug, starttime=False, clip=False, length=86400, seisan_chan_names=True, ignore_length=False): """ Basic function to process data, usually called by dayproc or shortproc. Functionally, this will bandpass, downsample and check headers and length of trace to ensure files start at the start of a day and are daylong. This is a simple wrapper on obspy functions, we include it here to provide a system to ensure all parts of the dataset are processed in the same way. .. note:: Usually this function is called via dayproc or shortproc. :type tr: obspy.core.trace.Trace :param tr: Trace to process :type lowcut: float :param lowcut: Low cut in Hz, if set to None and highcut is set, will use \ a lowpass filter. :type highcut: float :param highcut: High cut in Hz, if set to None and lowcut is set, will \ use a highpass filter. :type filt_order: int :param filt_order: Number of corners for filter. :type samp_rate: float :param samp_rate: Desired sampling rate in Hz. :type debug: int :param debug: Debug output level from 0-5, higher numbers = more output. :type starttime: obspy.core.utcdatetime.UTCDateTime :param starttime: Desired start of trace :type clip: bool :param clip: Whether to expect, and enforce a set length of data or not. :type length: float :param length: Use to set a fixed length for data from the given starttime. :type seisan_chan_names: bool :param seisan_chan_names: Whether channels are named like seisan channels (which are two letters rather than SEED convention of three) - defaults to True. :type ignore_length: bool :param ignore_length: See warning in dayproc. :return: Processed stream. :type: :class:`obspy.core.stream.Stream` """ # Add sanity check if highcut and highcut >= 0.5 * samp_rate: raise IOError('Highcut must be lower than the nyquist') # Define the start-time if starttime: # Be nice and allow a datetime object. if isinstance(starttime, dt.date) or isinstance(starttime, dt.datetime): starttime = UTCDateTime(starttime) day = starttime.date else: day = tr.stats.starttime.date if debug >= 2: print('Working on: ' + tr.stats.station + '.' + tr.stats.channel) if debug >= 5: tr.plot() # Do a brute force quality check qual = _check_daylong(tr) if not qual: msg = ("Data have more zeros than actual data, please check the raw", " data set-up and manually sort it: " + tr.stats.station + "." + tr.stats.channel) raise ValueError(msg) tr = tr.detrend('simple') # Detrend data before filtering if debug > 0: print('I have ' + str(len(tr.data)) + ' data points for ' + tr.stats.station + '.' + tr.stats.channel + ' before processing') # Sanity check to ensure files are daylong if float(tr.stats.npts / tr.stats.sampling_rate) != length and clip: if debug >= 2: print('Data for ' + tr.stats.station + '.' + tr.stats.channel + ' are not of daylong length, will zero pad') if tr.stats.endtime - tr.stats.starttime < 0.8 * length\ and not ignore_length: msg = ('Data for %s.%s is %i hours long, which is less than 0.8 ' 'of the desired length, will not pad' % (tr.stats.station, tr.stats.channel, (tr.stats.endtime - tr.stats.starttime) / 3600)) raise NotImplementedError(msg) # Use obspy's trim function with zero padding tr = tr.trim(starttime, starttime + length, pad=True, fill_value=0, nearest_sample=True) # If there is one sample too many after this remove the first one # by convention if len(tr.data) == (length * tr.stats.sampling_rate) + 1: tr.data = tr.data[1:len(tr.data)] if not tr.stats.sampling_rate * length == tr.stats.npts: raise ValueError('Data are not daylong for ' + tr.stats.station + '.' + tr.stats.channel) print('I now have %i data points after enforcing length' % len(tr.data)) # Check sampling rate and resample if tr.stats.sampling_rate != samp_rate: if debug >= 2: print('Resampling') tr.resample(samp_rate) # Filtering section tr = tr.detrend('simple') # Detrend data again before filtering if highcut and lowcut: if debug >= 2: print('Bandpassing') tr.data = bandpass(tr.data, lowcut, highcut, tr.stats.sampling_rate, filt_order, True) elif highcut: if debug >= 2: print('Lowpassing') tr.data = lowpass(tr.data, highcut, tr.stats.sampling_rate, filt_order, True) elif lowcut: if debug >= 2: print('Highpassing') tr.data = highpass(tr.data, lowcut, tr.stats.sampling_rate, filt_order, True) else: warnings.warn('No filters applied') # Account for two letter channel names in s-files and therefore templates if seisan_chan_names: tr.stats.channel = tr.stats.channel[0] + tr.stats.channel[-1] # Sanity check the time header if tr.stats.starttime.day != day and clip: warnings.warn("Time headers do not match expected date: " + str(tr.stats.starttime)) # Sanity check to ensure files are daylong if float(tr.stats.npts / tr.stats.sampling_rate) != length and clip: if debug >= 2: print('Data for ' + tr.stats.station + '.' + tr.stats.channel + ' is not of daylong length, will zero pad') # Use obspy's trim function with zero padding tr = tr.trim(starttime, starttime + length, pad=True, fill_value=0, nearest_sample=True) # If there is one sample too many after this remove the last one # by convention if len(tr.data) == (length * tr.stats.sampling_rate) + 1: tr.data = tr.data[1:len(tr.data)] if not tr.stats.sampling_rate * length == tr.stats.npts: raise ValueError('Data are not daylong for ' + tr.stats.station + '.' + tr.stats.channel) # Final visual check for debug if debug > 4: tr.plot() return tr
del data logging.debug("%s.%s Merging Stream" % (station, comp)) stream.merge(fill_value=0) #fills gaps with 0s and gives only one 'Trace' logging.debug("%s.%s Slicing Stream to %s:%s" % (station, comp,utcdatetime.UTCDateTime(goal_day.replace('-','')),utcdatetime.UTCDateTime(goal_day.replace('-',''))+goal_duration-stream[0].stats.delta)) stream[0].trim(utcdatetime.UTCDateTime(goal_day.replace('-','')),utcdatetime.UTCDateTime(goal_day.replace('-',''))+goal_duration-stream[0].stats.delta, pad=True,fill_value=0.0) trace = stream[0] data = trace.data freq = preprocess_lowpass logging.debug("%s.%s Lowpass at %.2f Hz" % (station, comp,freq)) data = lowpass(trace.data, freq, trace.stats.sampling_rate,zerophase=True) freq = preprocess_highpass logging.debug("%s.%s Highpass at %.2f Hz" % (station, comp,freq)) data = highpass(data, freq, trace.stats.sampling_rate,zerophase=True) samplerate = trace.stats['sampling_rate'] if samplerate != goal_sampling_rate: if resampling_method == "Resample": logging.debug("%s.%s Downsample to %.1f Hz" % (station, comp,goal_sampling_rate)) data = resample(data, goal_sampling_rate/trace.stats.sampling_rate, 'sinc_best') elif resampling_method == "Decimate": logging.debug("%s.%s Decimate by a factor of %i" % (station, comp,decimation_factor)) data = data[::decimation_factor] # logging.debug('Data for %s: %s - %s' % (station, trace.stats.starttime , trace.stats.endtime)) # print 'Data for %s: %s - %s' % (station, trace.stats.starttime , trace.stats.endtime) year, month, day, hourf, minf, secf, wday,yday,isdst = trace.stats.starttime.utctimetuple()
def resampleFilterAndCutTraces(stream, resampling_rate, lowpass_value, highpass_value, zerophase, corners, starttime, endtime, message_function=None): """ Resamples, filters and cuts all Traces in a Stream object. It will always apply each operation to every trace in the order described above. :param stream: obspy.core.stream object Will be altered and has to contain at least one Trace. :param resampling_rate: float Desired new sample rate. :param lowpass_value: float High filter frequency. :param highpass_value: float Low filter frequency. :param zerophase: bool Whether or not to use a zerophase filter. :param corners: int Number of corners for the used Butterworth-Filter. :param starttime: obspy.core.UTCDateTime New starttime of each Trace. :param endtime: obspy.core.UTCDateTime New endtime of each Trace. :param message_function: Python function If given, a string will be passed to this function to document the current progress. """ # Convert to floats for more exact handling. Also level the data. for trace in stream: trace.data = np.require(trace.data, 'float32') trace.data -= np.linspace(trace.data[0], trace.data[-1], len(trace.data)) # The first step is to resample the data. This is done before trimming # so that any boundary effects that might occur can be cut away later # on. if resampling_rate != stream[0].stats.sampling_rate: time_range = stream[0].stats.endtime - \ stream[0].stats.starttime new_npts = time_range / \ (1 / resampling_rate) + 1 new_freq = 1.0 / (time_range / float(new_npts - 1)) for _i, trace in enumerate(stream): if message_function: msg = 'Resampling traces to %.2f Hz [%i/%i]...' % \ (resampling_rate, _i + 1, len(stream)) message_function(msg) # Use scipy to resample the traces. trace.data = resample(trace.data, new_npts, window='hamming') trace.stats.sampling_rate = new_freq # Filter the trace. Differentiate between low-, high-, and bandpass if lowpass_value and highpass_value: if message_function: msg = 'Bandpass filtering traces from %.2f Hz to %.2f Hz...' % \ (highpass_value, highpass_value) message_function(msg) for trace in stream: trace.data = bandpass(trace.data, highpass_value, lowpass_value, trace.stats.sampling_rate, corners=corners, zerophase=zerophase) elif lowpass_value: if message_function: msg = 'Lowpass filtering traces with %.2f Hz...' % lowpass_value message_function(msg) for trace in stream: trace.data = lowpass(trace.data, lowpass_value, trace.stats.sampling_rate, corners=corners, zerophase=zerophase) elif highpass_value: if message_function: msg = 'Highpass filtering traces with %.2f Hz...' % highpass_value message_function(msg) for trace in stream: trace.data = highpass(trace.data, highpass_value, trace.stats.sampling_rate, corners=corners, zerophase=zerophase) # Trim the trace if it is necessary. if message_function: message_function('Trimming traces...') stream.trim(starttime, endtime)
def process(tr, lowcut, highcut, filt_order, samp_rate, debug, starttime=False, full_day=False): r"""Basic function to bandpass, downsample and check headers and length \ of trace to ensure files start at the start of a day and are daylong. Works in place on data. This is employed to ensure all parts of the data \ are processed in the same way. .. note:: Usually this function is called via dayproc or shortproc. :type tr: obspy.Trace :param tr: Trace to process :type highcut: float :param highcut: High cut in Hz, if set to None and lowcut is set, will \ use a highpass filter. :type lowcut: float :type lowcut: Low cut in Hz, if set to None and highcut is set, will use \ a lowpass filter. :type filt_order: int :param filt_order: Number of corners for filter. :type samp_rate: float :param samp_rate: Desired sampling rate in Hz :type debug: int :param debug: Debug output level from 0-5, higher numbers = more output :type starttime: obspy.UTCDateTime :param starttime: Desired start of trace :type full_day: bool :param full_day: Whether to expect, and enforce a full day of data or not. :return: obspy.Stream .. note:: Will convert channel names to two charecters long. """ import warnings from obspy.signal.filter import bandpass, lowpass, highpass # Add sanity check if highcut and highcut >= 0.5 * samp_rate: raise IOError('Highcut must be lower than the nyquist') # Define the start-time if starttime: day = starttime.date else: day = tr.stats.starttime.date if debug >= 2: print('Working on: ' + tr.stats.station + '.' + tr.stats.channel) if debug >= 5: tr.plot() # Do a brute force quality check qual = _check_daylong(tr) if not qual: msg = ("Data have more zeros than actual data, please check the raw", " data set-up and manually sort it") raise ValueError(msg) tr = tr.detrend('simple') # Detrend data before filtering # If there is one sample too many remove the first sample - this occurs # at station FOZ where the first sample is zero when it shouldn't be, # Not real sample: generated during data download # if full_day: # if len(tr.data) == (86400 * tr.stats.sampling_rate) + 1: # tr.data = tr.data[1:len(tr.data)] if debug > 0: print('I have ' + str(len(tr.data)) + ' data points for ' + tr.stats.station + '.' + tr.stats.channel + ' before processing') # Sanity check to ensure files are daylong if float(tr.stats.npts / tr.stats.sampling_rate) != 86400.0\ and full_day: if debug >= 2: print('Data for ' + tr.stats.station + '.' + tr.stats.channel + ' is not of daylong length, will zero pad') # Work out when the trace thinks it is starting # traceday = UTCDateTime(str(tr.stats.starttime.year)+'-' + # str(tr.stats.starttime.month)+'-' + # str(tr.stats.starttime.day)) # Use obspy's trim function with zero padding tr = tr.trim(starttime, starttime + 86400, pad=True, fill_value=0, nearest_sample=True) # If there is one sample too many after this remove the last one # by convention if len(tr.data) == (86400 * tr.stats.sampling_rate) + 1: tr.data = tr.data[1:len(tr.data)] if not tr.stats.sampling_rate * 86400 == tr.stats.npts: raise ValueError('Data are not daylong for ' + tr.stats.station + '.' + tr.stats.channel) print('I now have ' + str(len(tr.data)) + ' data points after enforcing day length') # Check sampling rate and resample if tr.stats.sampling_rate != samp_rate: if debug >= 2: print('Resampling') tr.resample(samp_rate) # Filtering section tr = tr.detrend('simple') # Detrend data again before filtering if highcut and lowcut: if debug >= 2: print('Bandpassing') tr.data = bandpass(tr.data, lowcut, highcut, tr.stats.sampling_rate, filt_order, True) elif highcut: if debug >= 2: print('Lowpassing') tr.data = lowpass(tr.data, highcut, tr.stats.sampling_rate, filt_order, True) elif lowcut: if debug >= 2: print('Highpassing') tr.data = highpass(tr.data, lowcut, tr.stats.sampling_rate, filt_order, True) else: warnings.warn('No filters applied') # Account for two letter channel names in s-files and therefore templates tr.stats.channel = tr.stats.channel[0] + tr.stats.channel[-1] # Sanity check the time header if tr.stats.starttime.day != day != day and full_day: warnings.warn("Time headers do not match expected date: " + str(tr.stats.starttime)) # Sanity check to ensure files are daylong if float(tr.stats.npts / tr.stats.sampling_rate) != 86400.0 and full_day: if debug >= 2: print('Data for ' + tr.stats.station + '.' + tr.stats.channel + ' is not of daylong length, will zero pad') # Use obspy's trim function with zero padding tr = tr.trim(starttime, starttime + 86400, pad=True, fill_value=0, nearest_sample=True) # If there is one sample too many after this remove the last one # by convention if len(tr.data) == (86400 * tr.stats.sampling_rate) + 1: tr.data = tr.data[1:len(tr.data)] if not tr.stats.sampling_rate * 86400 == tr.stats.npts: raise ValueError('Data are not daylong for ' + tr.stats.station + '.' + tr.stats.channel) # Final visual check for debug if debug >= 4: tr.plot() return tr
trace = stream[0] data = trace.data freq = preprocess_lowpass logging.debug("%s.%s Lowpass at %.2f Hz" % (station, comp, freq)) data = lowpass(trace.data, freq, trace.stats.sampling_rate, zerophase=True) freq = preprocess_highpass logging.debug("%s.%s Highpass at %.2f Hz" % (station, comp, freq)) data = highpass(data, freq, trace.stats.sampling_rate, zerophase=True) samplerate = trace.stats['sampling_rate'] if samplerate != goal_sampling_rate: if resampling_method == "Resample": logging.debug("%s.%s Downsample to %.1f Hz" % (station, comp, goal_sampling_rate)) data = resample( data, goal_sampling_rate / trace.stats.sampling_rate, 'sinc_best') elif resampling_method == "Decimate": logging.debug("%s.%s Decimate by a factor of %i" % (station, comp, decimation_factor)) data = data[::decimation_factor]