def test_correlate_template_different_amplitudes(self): """ Check that correlations are the same independent of template amplitudes """ data = np.random.randn(20000) template = data[1000:1200] template_large = template * 10e10 template_small = template * 10e-10 cc = correlate_template(data, template) cc_large = correlate_template(data, template_large) cc_small = correlate_template(data, template_small) np.testing.assert_allclose(cc, cc_large) np.testing.assert_allclose(cc, cc_small)
def test_correlate_template_zeros_in_input(self): template = np.zeros(10) data = read()[0].data[380:420] xcorr = correlate_template(data, template) np.testing.assert_equal(xcorr, np.zeros(len(xcorr))) template[:] = data[:10] data[5:20] = 0 xcorr = correlate_template(data, template) np.testing.assert_equal(xcorr[5:11], np.zeros(6)) data[:] = 0 xcorr = correlate_template(data, template) np.testing.assert_equal(xcorr, np.zeros(len(xcorr))) xcorr = correlate_template(data, template, normalize='naive') np.testing.assert_equal(xcorr, np.zeros(len(xcorr)))
def test_correlate_template_versus_correlate(self): data = read()[0].data template = data[400:600] data = data[380:620] xcorr1 = correlate_template(data, template, normalize='naive') xcorr2 = correlate(data, template, 20) np.testing.assert_equal(xcorr1, xcorr2)
def test_integer_input_equals_float_input(self): a = [-3, 0, 4] b = [-3, 4] c = np.array(a, dtype=float) d = np.array(b, dtype=float) for demean in (True, False): for normalize in (None, 'naive'): cc1 = correlate(a, b, 3, demean=demean, normalize=normalize, method='direct') cc2 = correlate(c, d, 3, demean=demean, normalize=normalize) np.testing.assert_allclose(cc1, cc2) for normalize in (None, 'naive', 'full'): cc3 = correlate_template(a, b, demean=demean, normalize=normalize, method='direct') cc4 = correlate_template(c, d, demean=demean, normalize=normalize) np.testing.assert_allclose(cc3, cc4)
def test_correlate_template_correct_alignment_of_normalization(self): data = read()[0].data template = data[400:600] data = data[380:620] # test for all combinations of odd and even length input data for i1, i2 in ((0, 0), (0, 1), (1, 1), (1, 0)): for mode in ('valid', 'same', 'full'): for demean in (True, False): xcorr = correlate_template(data[i1:], template[i2:], mode=mode, demean=demean) self.assertAlmostEqual(np.max(xcorr), 1)
def test_correlate_template_nodemean_fastmatchedfilter(self): """ Compare non-demeaned result against FMF derived result. FMF result obtained by the following: import copy import numpy as np from fast_matched_filter import matched_filter from obspy import read data = read()[0].data template = copy.deepcopy(data[400:600]) data = data[380:620] result = matched_filter( templates=template.reshape(1, 1, 1, len(template)), moveouts=np.array(0).reshape(1, 1, 1), weights=np.array(1).reshape(1, 1, 1), data=data.reshape(1, 1, len(data)), step=1, arch='cpu')[0] .. note:: FastMatchedFilter doesn't use semver, but result generated by Calum Chamberlain on 18 Jan 2018 using up-to-date code, with the patch in https://github.com/beridel/fast_matched_filter/pull/12 """ result = [ -1.48108244e-01, 4.71532270e-02, 1.82797655e-01, 1.92574233e-01, 1.18700281e-01, 1.18958903e-02, -9.23405439e-02, -1.40047163e-01, -1.00863703e-01, -4.86961426e-03, 1.04124829e-01, 1.72662303e-01, 1.41110823e-01, 1.53776666e-04, -1.71214968e-01, -2.83201426e-01, -3.04899812e-01, -2.03215942e-01, 8.88349637e-02, 5.00749528e-01, 7.18140483e-01, 5.29728174e-01, 1.30591258e-01, -1.83402568e-01, -3.22406143e-01, -3.20676118e-01, -1.98054180e-01, -5.06028766e-04, 1.56253457e-01, 1.74580097e-01, 6.49696961e-02, -8.56237561e-02, -1.89858019e-01, -1.96504310e-01, -1.04968190e-01, 2.51029599e-02, 1.32686019e-01, 2.03692451e-01, 2.11983219e-01, 0.00000000e+00, 0.00000000e+00] data = read()[0].data template = data[400:600] data = data[380:620] # FMF demeans template but does not locally demean data for # normalization template = template - template.mean() cc = correlate_template(data, template, demean=False) # FMF misses the last two elements? np.testing.assert_allclose(cc[0:-2], result[0:-2], atol=1e-7) shift, corr = xcorr_max(cc) self.assertEqual(shift, 0)
def test_correlate_template_nodemean_fastmatchedfilter(self): """ Compare non-demeaned result against FMF derived result. FMF result obtained by the following: import copy import numpy as np from fast_matched_filter import matched_filter from obspy import read data = read()[0].data template = copy.deepcopy(data[400:600]) data = data[380:620] result = matched_filter( templates=template.reshape(1, 1, 1, len(template)), moveouts=np.array(0).reshape(1, 1, 1), weights=np.array(1).reshape(1, 1, 1), data=data.reshape(1, 1, len(data)), step=1, arch='cpu')[0] .. note:: FastMatchedFilter doesn't use semver, but result generated by Calum Chamberlain on 18 Jan 2018 using up-to-date code, with the patch in https://github.com/beridel/fast_matched_filter/pull/12 """ result = [ -1.48108244e-01, 4.71532270e-02, 1.82797655e-01, 1.92574233e-01, 1.18700281e-01, 1.18958903e-02, -9.23405439e-02, -1.40047163e-01, -1.00863703e-01, -4.86961426e-03, 1.04124829e-01, 1.72662303e-01, 1.41110823e-01, 1.53776666e-04, -1.71214968e-01, -2.83201426e-01, -3.04899812e-01, -2.03215942e-01, 8.88349637e-02, 5.00749528e-01, 7.18140483e-01, 5.29728174e-01, 1.30591258e-01, -1.83402568e-01, -3.22406143e-01, -3.20676118e-01, -1.98054180e-01, -5.06028766e-04, 1.56253457e-01, 1.74580097e-01, 6.49696961e-02, -8.56237561e-02, -1.89858019e-01, -1.96504310e-01, -1.04968190e-01, 2.51029599e-02, 1.32686019e-01, 2.03692451e-01, 2.11983219e-01, 0.00000000e+00, 0.00000000e+00 ] data = read()[0].data template = data[400:600] data = data[380:620] # FMF demeans template but does not locally demean data for # normalization template = template - template.mean() cc = correlate_template(data, template, demean=False) # FMF misses the last two elements? np.testing.assert_allclose(cc[0:-2], result[0:-2], atol=1e-7) shift, corr = xcorr_max(cc) self.assertEqual(shift, 0)
def test_correlate_template_eqcorrscan(self): """ Test for moving window correlations with "full" normalisation. Comparison result is from EQcorrscan v.0.2.7, using the following: from eqcorrscan.utils.correlate import get_array_xcorr from obspy import read data = read()[0].data template = data[400:600] data = data[380:620] eqcorrscan_func = get_array_xcorr("fftw") result = eqcorrscan_func( stream=data, templates=template.reshape(1, len(template)), pads=[0])[0][0] """ result = [ -2.24548906e-01, 7.10350871e-02, 2.68642932e-01, 2.75941312e-01, 1.66854098e-01, 1.66086946e-02, -1.29057273e-01, -1.96172655e-01, -1.41613603e-01, -6.83271606e-03, 1.45768464e-01, 2.42143899e-01, 1.98310092e-01, 2.16377302e-04, -2.41576880e-01, -4.00586188e-01, -4.32240069e-01, -2.88735539e-01, 1.26461715e-01, 7.09268868e-01, 9.99999940e-01, 7.22769439e-01, 1.75955653e-01, -2.46459037e-01, -4.34027880e-01, -4.32590246e-01, -2.67131507e-01, -6.78363896e-04, 2.08171085e-01, 2.32197508e-01, 8.64804164e-02, -1.14158235e-01, -2.53621429e-01, -2.62945205e-01, -1.40505865e-01, 3.35594788e-02, 1.77415669e-01, 2.72263527e-01, 2.81718552e-01, 1.38080209e-01, -1.27307668e-01 ] data = read()[0].data template = data[400:600] data = data[380:620] cc = correlate_template(data, template) np.testing.assert_allclose(cc, result, atol=1e-7) shift, corr = xcorr_max(cc) self.assertAlmostEqual(corr, 1.0) self.assertEqual(shift, 0)
def test_correlate_template_eqcorrscan(self): """ Test for moving window correlations with "full" normalisation. Comparison result is from EQcorrscan v.0.2.7, using the following: from eqcorrscan.utils.correlate import get_array_xcorr from obspy import read data = read()[0].data template = data[400:600] data = data[380:620] eqcorrscan_func = get_array_xcorr("fftw") result = eqcorrscan_func( stream=data, templates=template.reshape(1, len(template)), pads=[0])[0][0] """ result = [ -2.24548906e-01, 7.10350871e-02, 2.68642932e-01, 2.75941312e-01, 1.66854098e-01, 1.66086946e-02, -1.29057273e-01, -1.96172655e-01, -1.41613603e-01, -6.83271606e-03, 1.45768464e-01, 2.42143899e-01, 1.98310092e-01, 2.16377302e-04, -2.41576880e-01, -4.00586188e-01, -4.32240069e-01, -2.88735539e-01, 1.26461715e-01, 7.09268868e-01, 9.99999940e-01, 7.22769439e-01, 1.75955653e-01, -2.46459037e-01, -4.34027880e-01, -4.32590246e-01, -2.67131507e-01, -6.78363896e-04, 2.08171085e-01, 2.32197508e-01, 8.64804164e-02, -1.14158235e-01, -2.53621429e-01, -2.62945205e-01, -1.40505865e-01, 3.35594788e-02, 1.77415669e-01, 2.72263527e-01, 2.81718552e-01, 1.38080209e-01, -1.27307668e-01] data = read()[0].data template = data[400:600] data = data[380:620] cc = correlate_template(data, template) np.testing.assert_allclose(cc, result, atol=1e-7) shift, corr = xcorr_max(cc) self.assertAlmostEqual(corr, 1.0) self.assertEqual(shift, 0)
def test_correlate_template_eqcorrscan_time(self): """ Test full normalization for method='direct'. """ result = [ -2.24548906e-01, 7.10350871e-02, 2.68642932e-01, 2.75941312e-01, 1.66854098e-01, 1.66086946e-02, -1.29057273e-01, -1.96172655e-01, -1.41613603e-01, -6.83271606e-03, 1.45768464e-01, 2.42143899e-01, 1.98310092e-01, 2.16377302e-04, -2.41576880e-01, -4.00586188e-01, -4.32240069e-01, -2.88735539e-01, 1.26461715e-01, 7.09268868e-01, 9.99999940e-01, 7.22769439e-01, 1.75955653e-01, -2.46459037e-01, -4.34027880e-01, -4.32590246e-01, -2.67131507e-01, -6.78363896e-04, 2.08171085e-01, 2.32197508e-01, 8.64804164e-02, -1.14158235e-01, -2.53621429e-01, -2.62945205e-01, -1.40505865e-01, 3.35594788e-02, 1.77415669e-01, 2.72263527e-01, 2.81718552e-01, 1.38080209e-01, -1.27307668e-01] data = read()[0].data template = data[400:600] data = data[380:620] cc = correlate_template(data, template, method='direct') np.testing.assert_allclose(cc, result, atol=1e-7) shift, corr = xcorr_max(cc) self.assertAlmostEqual(corr, 1.0) self.assertEqual(shift, 0)
def xcorr_cont(self, save_CCF=False, fmt=1): ''' save_CCF: save individual CCFs in project_name/output/Template_match/CCF_records/ fmt: output format if fmt=1: #OriginTime meanCC nSTA templateIDX fmt=2: #OriginTime meanCC nSTA templateIDX mean_maxCCC ''' from obspy import UTCDateTime, read, Stream, Trace import glob from scipy import signal from obspy.signal.cross_correlation import correlate_template import matplotlib matplotlib.use('pdf') #instead using interactive backend import matplotlib.pyplot as plt import os from repeq.data_proc import cal_CCC home = self.home project_name = self.project_name if self.ms == None: print('Run .template_load() first') return False else: #loop the templates (paths) for i_tmp in self.ms: print('----------------------------------------------') print('In template: %s' % (i_tmp)) tmp_idx = int( i_tmp.split('/')[-1].split('_')[-1].split('.')[0]) #if the detection exist, and self.overwrite is False, skip if not self.overwrite: if os.path.exists( home + '/' + project_name + '/output/Template_match/Detections/Detected_tmp_%05d.npy' % (tmp_idx)) & os.path.exists( home + '/' + project_name + '/output/Template_match/Detections/Detected_tmp_%05d.txt' % (tmp_idx)): print('Both %05d.npy and %05d.txt file exist, skip' % (tmp_idx, tmp_idx)) continue #if self.overwrite or both .npy and .txt files are not exist OUT1 = open( home + '/' + project_name + '/output/Template_match/Detections/Detected_tmp_%05d.txt' % (tmp_idx), 'w') #output earthquake origin time if fmt == 1: OUT1.write('#OriginTime meanCC stdCC nSTA templateIDX\n') elif fmt == 2: OUT1.write( '#OriginTime meanCC stdCC nSTA templateIDX mean_maxCCC std_maxCCC\n' ) # mean(max CCC for each stations), so that shift in each sta is negletable origintime = UTCDateTime(self.catalog.iloc[tmp_idx].Date + 'T' + self.catalog.iloc[tmp_idx].Time) st = read(i_tmp) #read template in #load info data(mainly for P or S wave info) pick_info = np.load(home + '/' + project_name + '/waveforms_template/' + 'template_%05d.npy' % (tmp_idx), allow_pickle=True) pick_info = pick_info.item() #read all directories of daily data dayst_paths = glob.glob(home + '/' + project_name + '/waveforms/' + '*000000') dayst_paths.sort() sav_mean_sh_CCF = [ ] #save all the daily CCF for later plotting sav_daily_nSTA = [] #number of stations for each daily CCF sav_alldays_eq_sta = { } #detailed info for CC,CCC,shifts for every station for all searched days by the same template #loop the daily data for dayst_path in dayst_paths: sav_NET = [] sav_STA = [] sav_CHN = [] sav_LOC = [] sav_phase = [] sav_CCF = [] sav_travel_npts = [] sav_travel_time = [] sav_continuousdata = [] sav_template = [] #initial for saving YMD = dayst_path.split('/')[-1][:8] print(' --Reading daily data: %s' % (dayst_path)) try: i_dayst = read( dayst_path + '/waveforms/merged.ms') #load daily data except: i_dayst = data_proc.read_obspy( dayst_path + '/waveforms/merged.ms') #filesize larger than 2GB #print(i_dayst.__str__(extended=True)) for i in range(len(st)): #-----loop individual pick/station/comp of template----- NET = st[i].stats.network STA = st[i].stats.station CHN = st[i].stats.channel LOC = st[i].stats.location #in daily data... search for same station,channel,comp,sampling rate....that matches the i_th pick in particular template tmp_dayst = i_dayst.select( network=NET, station=STA, sampling_rate=st[i].stats.sampling_rate, channel=CHN, location=LOC) tmp_dayst = tmp_dayst.copy() if len(tmp_dayst) != 1: if len(tmp_dayst) == 0: #print('Case1. No data found:%s, skip this station'%(STA+'.'+CHN)) pass else: #print('Case2. Multiple data found:%d, probably breaking tcs, skip this station'%(len(tmp_dayst))) #print(tmp_dayst) #tmp_dayst should be only one pass continue else: #find the station travel time #if len(phases[phases.Channel.str.startswith(regional.upper()+'.'+STA+'.'+CHN)])==0: # continue #cannot find station shift travel_time = st[i].stats.starttime + self.tcs_length[ 0] - origintime #get travel time implied from template header[sec] (assume data request always correct) travel_npts = int( np.round( travel_time * self.sampling_rate)) #travel time in npts #get data value for template and continuous(daily) data template = np.nan_to_num(st[i].data) continuousdata = np.nan_to_num(tmp_dayst[0].data) #run xcorr CCF = correlate_template(continuousdata, template) CCF = np.nan_to_num(CCF) #load info data outside the loop #pick_info = np.load(home+'/'+project_name+'/waveforms_template/'+'template_%05d.npy'%(tmp_idx),allow_pickle=True) #pick_info = pick_info.item() #save for later checking sav_NET.append(NET) sav_STA.append(STA) sav_CHN.append(CHN) sav_LOC.append(LOC) #Update 2020.11.12: order of .ms and pick_info.npy shoud now be the same #Double check! to see if the starttime matchs the pick_info assert np.abs( (UTCDateTime(pick_info['arrival'][i]) - pick_info['tcs_length'][0]) - st[i].stats.starttime ) < 0.02, 'pick_info and ms starttime does NOT match!' sav_phase.append( pick_info['phase'][i] ) #P or S phase. Causion! previous wrong because ith index in the st is not the ith index in the pick_info #debug #print('appending info:',NET+'.'+STA+'.'+CHN+'.'+LOC,PS) sav_travel_time.append(travel_time) sav_travel_npts.append(travel_npts) sav_CCF.append(CCF) sav_continuousdata.append(continuousdata) sav_template.append(template) if len(sav_CCF) < self.filt_nSTA: print( ' Number of CCF: %d, not enough for threshold' % (len(sav_CCF))) continue #not enough data available, continue to next daily data #----------dealing with shifting of each CCF---------- #travel_npts = np.array(travel_npts) sav_travel_npts = np.array( sav_travel_npts) #fix typo 2020.12.14 sav_travel_time = np.array(sav_travel_time) sh_sav_CCF = np.array(sav_CCF) #copy the original CCF #shifted CCF based on the template arrival for ii in range(len(sh_sav_CCF)): sh_sav_CCF[ii] = np.roll(sav_CCF[ii], -int(sav_travel_npts[ii])) print( ' Number of CCF: %d, continue searching earthquakes' % (len(sav_CCF))) mean_sh_CCF = np.mean(sh_sav_CCF, axis=0) #stack/mean all the CCFs. std_sh_CCF = np.std(sh_sav_CCF, axis=0) #also calculate std #save the individual CCF in Stream (for only debug purpose) #Update 2020.12.14. save all the info in .npy instead of obspy Stream (to also save shift info) if save_CCF: #raw CCF (unshifted) ST = Stream() for ii, iCCF in enumerate(sav_CCF): tmpCCF = Trace(iCCF) tmpCCF.stats.sampling_rate = i_dayst[ 0].stats.sampling_rate tmpCCF.stats.starttime = i_dayst[0].stats.starttime tmpCCF.stats.network = sav_NET[ii] tmpCCF.stats.station = sav_STA[ii] tmpCCF.stats.channel = sav_CHN[ii] tmpCCF.stats.location = sav_LOC[ii] ST += tmpCCF #create dict to save info sav_CCF_info = {} sav_CCF_info['CCF_raw'] = ST sav_CCF_info['shift_npts'] = sav_travel_npts sav_CCF_info['shift_time'] = sav_travel_time sav_CCF_info['OT_template'] = origintime np.save( home + '/' + project_name + '/output/Template_match/CCF_records/' + 'CCF_template_%05d_daily_%s.npy' % (tmp_idx, YMD), sav_CCF_info) ''' # ST = Stream() for ii,iCCF in enumerate(sh_sav_CCF): tmpCCF = Trace(iCCF) tmpCCF.stats.sampling_rate = i_dayst[0].stats.sampling_rate tmpCCF.stats.starttime = i_dayst[0].stats.starttime tmpCCF.stats.network = sav_NET[ii] tmpCCF.stats.station = sav_STA[ii] tmpCCF.stats.channel = sav_CHN[ii] tmpCCF.stats.location = sav_LOC[ii] ST += tmpCCF ST.write(home+'/'+project_name+'/output/Template_match/CCF_records/'+'shftCCF_template_%05d_daily_%s.ms'%(tmp_idx,YMD),format="MSEED") ''' #----------Find earthquakes by the mean CCF---------- time = i_dayst[0].times() eq_idx = np.where( mean_sh_CCF >= self.filt_CC)[0] #filter #1 #The mean_sh_CCF has length = len(dailydata)-len(template)+1 #remove the index that too close to the right edge. #filter #2 _idx = np.where( eq_idx < len(mean_sh_CCF) - 1 - np.max(sav_travel_npts) )[0] #-1 make length to index; max(shift) make sure all the templates wont touch the right bound eq_idx = eq_idx[_idx] sav_eq_sta = { } #save the detailed result(lag info, CCC value) for use later for neqid in eq_idx: #new_dayst[0].stats.starttime+time[np.argmax(mean_sh_CCF)] #find itself detected_OT = i_dayst[0].stats.starttime + time[ neqid] + self.tcs_length[ 0] #Origin time of which detection detected_OT_str = detected_OT.strftime( '%Y-%m-%dT%H:%M:%S.%f')[:-4] #accuracy to 0.01 sec print( ' New event found:', i_dayst[0].stats.starttime + time[neqid] + self.tcs_length[0] ) #find earthquakes,this is the arrival for template.st if fmt == 1: OUT1.write('%s %.3f %.3f %d %s\n' % (detected_OT_str, mean_sh_CCF[neqid], std_sh_CCF[neqid], len(sav_STA), 'template_%05d' % (tmp_idx))) elif fmt == 2: #calculate CCC for individual stations sav_maxCCC = [] #sav_sh_sec=[] for n in range(len(sav_template)): #loop in every station #print('writing info:',sav_NET[n]+'.'+sav_STA[n]+'.'+sav_CHN[n]+'.'+sav_LOC[n],sav_phase[n]) cut_daily = sav_continuousdata[n][ neqid + sav_travel_npts[n]:neqid + sav_travel_npts[n] + len(sav_template[n])] maxCCC, lag = cal_CCC(sav_template[n], cut_daily) if np.isnan(maxCCC): maxCCC = 0 #this is probably due to cross-correlate on a zero array midd = ( len(cut_daily) ) - 1 #length of b?? at this idx, refdata align with target data sh_sec = (lag - midd) * ( 1.0 / self.sampling_rate ) #convert to second (dt correction of P) sav_maxCCC.append(maxCCC) if detected_OT_str in sav_eq_sta: sav_eq_sta[detected_OT_str][ 'net_sta_comp'].append(sav_NET[n] + '.' + sav_STA[n] + '.' + sav_CHN[n] + '.' + sav_LOC[n]) sav_eq_sta[detected_OT_str][ 'phase'].append(sav_phase[n]) sav_eq_sta[detected_OT_str]['CCC'].append( maxCCC) sav_eq_sta[detected_OT_str]['CC'].append( sh_sav_CCF[n][neqid]) sav_eq_sta[detected_OT_str][ 'shift'].append(sh_sec) else: #initial dictionary sav_eq_sta[detected_OT_str] = {} sav_eq_sta[detected_OT_str][ 'net_sta_comp'] = [ sav_NET[n] + '.' + sav_STA[n] + '.' + sav_CHN[n] + '.' + sav_LOC[n] ] sav_eq_sta[detected_OT_str]['phase'] = [ sav_phase[n] ] sav_eq_sta[detected_OT_str]['CCC'] = [ maxCCC ] sav_eq_sta[detected_OT_str]['CC'] = [ sh_sav_CCF[n][neqid] ] #sh_sav_CCF[n][neqid] sav_eq_sta[detected_OT_str]['shift'] = [ sh_sec ] #sav_sh_sec.append(sh_sec) OUT1.write('%s %.3f %.3f %d %s %.3f %.3f\n' % (detected_OT_str, mean_sh_CCF[neqid], std_sh_CCF[neqid], len(sav_STA), 'template_%05d' % (tmp_idx), np.mean(sav_maxCCC), np.std(sav_maxCCC))) #-----Only for checking: plot the one with largest CC value and check (find itself if the template and daily are the same day)----- if self.plot_check: tmp_T = st[0].times() for i_eqidx, neqid in enumerate(eq_idx): #loop in detection detected_OT = i_dayst[0].stats.starttime + time[ neqid] + self.tcs_length[ 0] #Origin time of which detection detected_OT_str = detected_OT.strftime( '%Y-%m-%dT%H:%M:%S.%f' )[:-4] #accuracy to 0.01 sec plt.figure(1) for n in range(len(sav_template)): #loop in every station #cut_daily = sav_continuousdata[n][np.argmax(mean_sh_CCF)+sav_travel_npts[n]:np.argmax(mean_sh_CCF)+sav_travel_npts[n]+len(sav_template[n])] #old version only plot maximum cut_daily = sav_continuousdata[n][ neqid + sav_travel_npts[n]:neqid + sav_travel_npts[n] + len(sav_template[n])] cut_daily = cut_daily / np.max( np.abs(cut_daily)) plt.plot( tmp_T, cut_daily + n, 'k', linewidth=2 ) #time series cutted from daily time series plt.plot(tmp_T, sav_template[n] / np.max(np.abs(sav_template[n])) + n, 'r', linewidth=1.2) #template data plt.text(tmp_T[-1], n, sav_STA[n] + '.' + sav_CHN[n]) #---add individual CC value and max_CCC value--- if fmt == 1: #maxCCC,lag = cal_CCC(sav_template[n],cut_daily) #midd = (len(cut_daily))-1 #length of b?? at this idx, refdata align with target data #sh_sec = (lag-midd)*(1.0/self.sampling_rate) #convert to second (dt correction of P) plt.text( tmp_T[-1] * 0.05, n, 'CC=%.2f' % (sh_sav_CCF[n][neqid])) elif fmt == 2: maxCCC = sav_eq_sta[detected_OT_str][ 'CCC'][n] sh_sec = sav_eq_sta[detected_OT_str][ 'shift'][n] plt.text( tmp_T[-1] * 0.05, n, 'CC=%.2f,max_CCC=%.2f,dt=%.3f' % (sh_sav_CCF[n][neqid], maxCCC, sh_sec)) #Future improvement: if fmt==2, the value have been calculated, just get the value #if fmt == 1: #elif fmt ==2: #plt.title('Time:%s CC=%5.2f'%((i_dayst[0].stats.starttime+time[neqid]+self.tcs_length[0]).strftime('%H:%M:%S'),np.max(mean_sh_CCF))) plt.title( 'Time:%s CC=%5.2f' % ((i_dayst[0].stats.starttime + time[neqid] + self.tcs_length[0]).strftime('%H:%M:%S.%f'), mean_sh_CCF[neqid])) plt.savefig(home + '/' + project_name + '/output/Template_match/Figs/' + 'template_%05d_daily_%s_%03d.png' % (tmp_idx, YMD, i_eqidx)) plt.close() if i_eqidx > 99: break #don't plot if more than 99 plots in the same day sav_mean_sh_CCF.append(mean_sh_CCF) sav_daily_nSTA.append(len(sav_CCF)) sav_alldays_eq_sta.update( sav_eq_sta) #not support for fmt=1 ##------output detailed data(lag information for each station) in .npy --------- #only if fmt=2, fmt=1 didnt calculate the CCC if fmt == 2: np.save( home + '/' + project_name + '/output/Template_match/Detections/' + 'Detected_tmp_%05d.npy' % (tmp_idx), sav_alldays_eq_sta) #----plot the mean_shifted_CCF for all days---- plt.figure(1) for n in range(len(sav_mean_sh_CCF)): plt.plot(sav_mean_sh_CCF[n] + n, linewidth=1) if n == 0: plt.text(len(sav_mean_sh_CCF[n]), n, 'N=%d' % (sav_daily_nSTA[n])) #number of stations else: plt.text(len(sav_mean_sh_CCF[n]), n, '%d' % (sav_daily_nSTA[n])) #number of stations plt.title('Mean CCF (template_%05d)' % (tmp_idx), fontsize=16) plt.ylabel('Days after %s' % (dayst_paths[0].split('/')[-1][:8]), fontsize=16) plt.savefig(home + '/' + project_name + '/output/Template_match/Figs/' + 'MeanCCF_%05d.png' % (tmp_idx)) plt.close() OUT1.close()
# set master event for correlation masterEvent = waveforms[11] #masterEvent = waveforms[122] # open file for output outFile = h5py.File(path + type + "_correlations.h5", "w") # make some arrays for storing output shifts = np.zeros((len(waveforms))) corrCoefs = np.zeros((len(waveforms))) for i in range(len(waveforms)): # correlate master event and waveform i corr = correlate_template(masterEvent, waveforms[i]) shift, corrCoef = xcorr_max(corr) # save output shifts[i] = shift corrCoefs[i] = corrCoef # give the user some output print("Correlated master event with " + str(round(i / len(waveforms) * 100)) + "% of events") # write output to file outFile.create_dataset("corrCoefs", data=corrCoefs) outFile.create_dataset("shifts", data=shifts) # close output file
def lin_corr(patient_id: str, time_begin: list, duration: float, t_lag=0.7, critical_corr=0.7): """ :param patient_id: :param time_begin: List with [hour, minute]. :param duration: In seconds. :param t_lag: In seconds. :param critical_corr: :return: """ # Load and prepare data data_mat = loadmat('../data/' + patient_id + '_' + str(time_begin[0]) + 'h.mat') info_mat = loadmat('../data/' + patient_id + '_info.mat') fs = float(info_mat['fs']) sample_begin = int(time_begin[1] * 60 * fs) sample_end = sample_begin + int(duration * fs) data_raw = data_mat['EEG'][:, sample_begin:sample_end].transpose() n_lag = int(t_lag * fs) factor = np.exp(-1) # Compute normalized cross correlation (NCC) cctl = np.zeros((data_raw.shape[1], data_raw.shape[1], (n_lag * 2) + 1)) for from_ in range(data_raw.shape[1]): for to_ in range(data_raw.shape[1]): x = data_raw[:, to_] y = data_raw[n_lag:-n_lag, from_] cctl[from_, to_, :] = cross_correlation.correlate_template(x, y) # Calculate peak cross correlation (cc) and corresponding time lag (tl) sign = np.sign(np.max(cctl, axis=2) - np.abs(np.min(cctl, axis=2))) cc = np.multiply(np.max(np.abs(cctl), axis=2), sign) mask = np.where(np.abs(cc) > critical_corr, 1, np.nan) tl_n = np.argmax(np.abs(cctl), axis=2) tl = (tl_n - n_lag) * mask / fs * 1000 # in [ms] tl_no_mask = (tl_n - n_lag) / fs * 1000 # in [ms], used for plots # Calculate mean tau # Tile and stack values for future operations tl_n_stacked = np.dstack([tl_n] * cctl.shape[2]) arg_tau_stacked = factor * np.dstack([cc] * cctl.shape[2]) mask_stacked = np.dstack([np.where(np.abs(cc) > critical_corr, 1, 0)] * cctl.shape[2]) t_indices_tiled = np.tile(np.arange(0, cctl.shape[2]), (cctl.shape[0], cctl.shape[0], 1)) # Get indices of values close to factor of peak cross correlation close_indices = np.isclose(cctl, arg_tau_stacked, rtol=1e-1) * t_indices_tiled # Create mask to separate negative and positive tau higher_tau_mask = np.where(close_indices - tl_n_stacked > 0, 1, 0) lower_tau_mask = np.where((tl_n_stacked - close_indices > 0) & (close_indices != 0), 1, 0) # Eliminate possible third occurrence of np.isclose() to factor higher_edge_indices = np.where(np.diff(higher_tau_mask) == -1, 1, 0) * t_indices_tiled[:, :, :-1] higher_edge_indices = np.min(np.where(higher_edge_indices == 0, np.inf, higher_edge_indices), axis=2) higher_third_occ_mask = np.where(t_indices_tiled > np.dstack([higher_edge_indices] * cctl.shape[2]), 0, 1) lower_edge_indices = np.where(np.diff(lower_tau_mask) == 1, 1, 0) * t_indices_tiled[:, :, :-1] lower_edge_indices = np.max(lower_edge_indices, axis=2) lower_third_occ_mask = np.where(t_indices_tiled < np.dstack([lower_edge_indices] * cctl.shape[2]), 0, 1) # Apply masks (apply mask for critical correlation separately to get all taus for plots) higher_tau_masked_all = close_indices * higher_tau_mask * higher_third_occ_mask higher_tau_masked = higher_tau_masked_all * mask_stacked lower_tau_masked_all = close_indices * lower_tau_mask * lower_third_occ_mask lower_tau_masked = lower_tau_masked_all * mask_stacked # Compute median along time lag axis and ignore zero entries higher_tau = np.ma.median(np.ma.masked_where(higher_tau_masked == 0, higher_tau_masked), axis=2).filled(0) lower_tau = np.ma.median(np.ma.masked_where(lower_tau_masked == 0, lower_tau_masked), axis=2).filled(0) # Get taus without mask for critical correlation for plots higher_tau_all = np.ma.median(np.ma.masked_where(higher_tau_masked_all == 0, higher_tau_masked_all) , axis=2).filled(0) higher_tau_all = (higher_tau_all - n_lag) / fs * 1000 # in [ms] lower_tau_all = np.ma.median(np.ma.masked_where(lower_tau_masked_all == 0, lower_tau_masked_all) , axis=2).filled(0) lower_tau_all = (lower_tau_all - n_lag) / fs * 1000 # in [ms] # Calculate mean distance for tau to cc tau_n = (higher_tau - lower_tau) / 2 tau = np.where(tau_n == 0, np.nan, tau_n) / fs * 1000 # in [ms] # Additional masks for plots (diagonal, upper triangle, ...) tl_masked = tl.copy() cc_masked = cc.copy() np.fill_diagonal(tl_masked, np.nan) np.fill_diagonal(cc_masked, np.nan) cc_masked[np.triu_indices(cc_masked.shape[0], k=1)] = np.nan # Plot cc, tl and tau # General settings sns.set_style('white') fig = plt.figure(figsize=(10, 13)) gs = fig.add_gridspec(3, 2) cmap_div = copy.copy(mpl.cm.get_cmap('seismic')) cmap_div.set_bad('dimgrey') cmap_uni = copy.copy(mpl.cm.get_cmap('viridis')) cmap_uni.set_bad('dimgrey') # Subplot: Peak cross correlation ax0 = fig.add_subplot(gs[:1, :1]) sns.heatmap(cc_masked, cmap=cmap_div, vmin=-1, vmax=1) ax0.set_title('Peak cross correlation') ax0.set_xlabel('Node idx'), ax0.set_ylabel('Node idx') # Subplot: Histogram of peak cross correlation ax1 = fig.add_subplot(gs[:1, 1:]) sns.distplot(cc_masked, kde=False) ymin, ymax = ax1.get_ylim() xmin, xmax = ax1.get_xlim() label = 'Critical corr. = +/- ' + str(critical_corr) plt.plot([critical_corr, critical_corr], [ymin, ymax], linestyle='--', color='black', label=label) plt.plot([-critical_corr, -critical_corr], [ymin, ymax], linestyle='--', color='black') ax1.set_xlim(xmin, xmax), ax1.set_ylim(ymin, ymax) plt.legend() ax1.set_title('Peak cross correlation histogram') ax1.set_xlabel('Peak cross correlation [-]'), ax1.set_ylabel('Nr. of occurrence [-]') # Subplot: Time lag ax2 = fig.add_subplot(gs[1:2, :1]) vlim = np.nanmax(np.abs(tl)) sns.heatmap(tl_masked, cmap=cmap_div, vmin=-vlim, vmax=vlim) ax2.set_title('Corresponding time lag [ms]') ax2.set_xlabel('Node idx'), ax2.set_ylabel('Node idx') # Subplot: Histogram of time lag ax3 = fig.add_subplot(gs[1:2, 1:]) sns.distplot(tl_masked, kde=False) ax3.set_title('Time lag histogram') ax3.set_xlabel('Time [ms]'), ax3.set_ylabel('Nr. of occurrence [-]') # Subplot: Tau ax4 = fig.add_subplot(gs[2:, :1]) sns.heatmap(tau, cmap=cmap_uni) ax4.set_title('Corresponding tau [ms]') ax4.set_xlabel('Node idx'), ax4.set_ylabel('Node idx') # Subplot: Histogram of tau ax5 = fig.add_subplot(gs[2:, 1:]) sns.distplot(np.diagonal(tau), kde=False, label='Auto correlated') auto_corr = tau.copy() auto_corr[np.diag_indices(auto_corr.shape[0])] = np.nan sns.distplot(auto_corr, kde=False, label='Cross correlated') ax5.set_title('Tau histogram'), plt.legend() ax5.set_xlabel('Time [ms]'), ax5.set_ylabel('Nr. of occurrence [-]') plt.tight_layout() save_name = patient_id + '_' + str(time_begin[0]) + 'h' + str(time_begin[1]) + 'm' plt.savefig('../doc/figures/cc_' + save_name + '.png') plt.close() # t vector for plots plt.figure(figsize=(8, 5)) t = np.arange(0, cctl.shape[2]) t = (t - n_lag) / fs * 1000 n0 = 44 # Base node begin_N = 7 end_N = 14 # Number of line plots indices = [i for i in range(begin_N, end_N)] n_choices = 75 valid_choices = np.argwhere(~np.isnan(mask)) valid_indices = [i[0] * cctl.shape[0] + i[1] for i in valid_choices] # indices = np.random.choice(cctl.shape[0] * cctl.shape[1], n_choices, replace=False).tolist() indices = valid_indices #np.random.choice(valid_indices, n_choices, replace=False).tolist() peaks_x, peaks_y, taus_x_0, taus_x_1, taus_y = [], [], [], [], [] for i in indices: n0 = i % cctl.shape[0] n1 = int(math.floor(i / cctl.shape[1])) #n1 = n0 + i # Reference node plt.plot(t, cctl[n0, n1, :], label='Nodes ' + str(n0) + ' - ' + str(n1)) peaks_x.append(tl_no_mask[n0, n1]) peaks_y.append(cc[n0, n1]) taus_x_0.append(higher_tau_all[n0, n1]) taus_x_1.append(lower_tau_all[n0, n1]) taus_y.append(cc[n0, n1] * factor) plt.scatter(peaks_x, peaks_y, color='black', marker='d', label='Peak', zorder=len(indices) + 1) plt.scatter(taus_x_0, taus_y, color='black', marker='<', label='Right tau', zorder=len(indices) + 1) plt.scatter(taus_x_1, taus_y, color='black', marker='>', label='Left tau', zorder=len(indices) + 1) ymin, ymax = plt.gca().get_ylim() plt.plot([-t_lag*1000, t_lag*1000], [critical_corr, critical_corr], color='black', linestyle=':', label='Critical corr.') plt.plot([-t_lag*1000, t_lag*1000], [-critical_corr, -critical_corr], color='black', linestyle=':') plt.ylim(ymin, ymax) plt.xlabel('Time lag [ms]'), plt.ylabel('NCC [-]') plt.title('Normalized cross correlation: examples'), plt.legend(loc='upper right') plt.xlim(-t_lag * 1000, t_lag * 1000), plt.grid() save_name = patient_id + '_' + str(time_begin[0]) + 'h' + str(time_begin[1]) + 'm' plt.savefig('../doc/figures/cctl_' + save_name + '.png') plt.close()