Пример #1
0
def read_multiplet(filename,
                   idx,
                   return_tp=False,
                   db_path=cfg.dbpath,
                   db_path_T='template_db_1/',
                   db_path_M='matched_filter_1/'):
    """
    read_multiplet(filename, idx, db_path=cfg.dbpath) \n
    """
    S = Stream()
    f_meta = db_path + db_path_M + filename + 'meta.h5'
    fm = h5.File(f_meta, 'r')
    T = read_template('template{:d}'.format(fm['template_id'][0]),
                      db_path=db_path + db_path_T)
    f_wave = db_path + db_path_M + filename + 'wav.h5'
    fw = h5.File(f_wave, 'r')
    waveforms = fw['waveforms'][idx, :, :, :]
    fw.close()
    #---------------------------------
    stations = fm['stations'][:].astype('U')
    components = fm['components'][:].astype('U')
    ns = len(stations)
    nc = len(components)
    #---------------------------------
    date = udt(fm['origin_times'][idx])
    for s in range(ns):
        for c in range(nc):
            S += Trace(data=waveforms[s, c, :])
            S[-1].stats['station'] = stations[s]
            S[-1].stats['channel'] = components[c]
            S[-1].stats['sampling_rate'] = cfg.sampling_rate
            S[-1].stats.starttime = date
    S.s_moveouts = T.metadata['s_moveouts']
    S.p_moveouts = T.metadata['p_moveouts']
    S.source_idx = T.metadata['source_idx']
    S.template_ID = T.metadata['template_idx']
    S.latitude = T.metadata['latitude']
    S.longitude = T.metadata['longitude']
    S.depth = T.metadata['depth']
    S.corr = fm['correlation_coefficients'][idx]
    S.stations = stations.tolist()
    S.components = components.tolist()
    fm.close()
    if return_tp:
        return S, T
    else:
        return S
def SVDWF_multiplets_bulk(template_id, db_path=autodet.cfg.dbpath, db_path_M='matched_filter_1/', db_path_T='template_db_1/', \
                                       WAVEFORMS=None, best=False, normRMS=True, \
                                       n_singular_values=5, max_freq=autodet.cfg.max_freq, attach_raw_data=False):
    from obspy import Stream, Trace
    from scipy.linalg import svd
    from scipy.signal import wiener

    #-----------------------------------------------------------------------------------------------
    T = autodet.db_h5py.read_template('template{:d}'.format(template_id), db_path=db_path+db_path_T)
    #-----------------------------------------------------------------------------------------------
    files_all = glob.glob(db_path + db_path_M + '*multiplets_*meta.h5')
    files     = []
    #------------------------------
    S  = Stream()
    CC = []
    tid_str = str(template_id)
    t1 = give_time()
    for file in files_all:
        with h5.File(file, mode='r') as f:
            if tid_str in f.keys():
                files.append(file[:-len('meta.h5')])
                CC.extend(f[tid_str]['correlation_coefficients'][()].tolist())
    CC = np.float32(CC)
    t2 = give_time()
    print('{:.2f} s to retrieve the correlation coefficients.'.format(t2-t1))
    if len(files) == 0:
        print("None multiplet for template {:d} !! Return None".format(template_id))
        return None
    with h5.File(files[0] + 'meta.h5', mode='r') as f:
        S.stations   = f[tid_str]['stations'][()].astype('U').tolist()
        S.components = f[tid_str]['components'][()].astype('U').tolist()
    ns = len(S.stations)
    nc = len(S.components)
    S.latitude  = T.metadata['latitude']
    S.longitude = T.metadata['longitude']
    S.depth     = T.metadata['depth']
    #------------------------------
    #----------------------------------------------
    if WAVEFORMS is None:
        CC = np.sort(CC)
        if len(CC) > 300:
            CC_thres = CC[-101] 
        elif len(CC) > 70:
            CC_thres = CC[int(7./10.*len(CC))] # the best 30%
        elif len(CC) > 30:
            CC_thres = np.median(CC) # the best 50%
        elif len(CC) > 10:
            CC_thres = np.percentile(CC, 33.) # the best 66% detections 
        else:
            CC_thres = 0.
        Nstack = np.zeros((ns, nc), dtype=np.float32)
        WAVEFORMS  = []
        Nmulti = 0
        t1 = give_time()
        for file in files:
            if best:
                with h5.File(file + 'meta.h5', mode='r') as fm:
                    selection = np.where(fm[tid_str]['correlation_coefficients'][:] > CC_thres)[0]
                    if selection.size == 0:
                        continue
                with h5.File(file + 'wav.h5', mode='r') as fw:
                    WAVEFORMS.append(fw[tid_str]['waveforms'][selection, :, :, :])
            else:
                with h5.File(file + 'wav.h5', mode='r') as fw:
                    WAVEFORMS.append(fw[tid_str]['waveforms'][()])
            Nmulti += WAVEFORMS[-1].shape[0]
            for m in range(WAVEFORMS[-1].shape[0]):
                for s in range(ns):
                    for c in range(nc):
                        if normRMS:
                            norm = np.sqrt(np.var(WAVEFORMS[-1][m,s,c,:]))
                        else:
                            norm =1.
                        if norm != 0.:
                            WAVEFORMS[-1][m,s,c,:] /= norm
        t2 = give_time()
        print('{:.2f} s to retrieve the waveforms.'.format(t2-t1))
    elif normRMS:
        for m in range(WAVEFORMS.shape[0]):
            for s in range(ns):
                for c in range(nc):
                    norm = np.sqrt(np.var(WAVEFORMS[m,s,c,:]))
                    if norm != 0.:
                        WAVEFORMS[m,s,c,:] /= norm
    else:
        pass
    WAVEFORMS = np.vstack(WAVEFORMS)
    WAVEFORMS = WAVEFORMS.reshape(-1, ns, nc, WAVEFORMS.shape[-1])
    print(WAVEFORMS.shape)
    filtered_data = np.zeros_like(WAVEFORMS)
    for s in range(ns):
        for c in range(nc):
            filtered_data[:,s,c,:] = SVDWF(WAVEFORMS[:,s,c,:], n_singular_values, max_freq=max_freq)
            #filtered_data[:,s,c,:] = spectral_filtering(WAVEFORMS[:,s,c,:], SNR_thres=5., max_freq=max_freq)
            mean = np.mean(filtered_data[:,s,c,:], axis=0)
            mean /= np.abs(mean).max()
            S += Trace(data=mean)
            S[-1].stats.station = S.stations[s]
            S[-1].stats.channel = S.components[c]
            S[-1].stats.sampling_rate = autodet.cfg.sampling_rate
    S.data = filtered_data
    if attach_raw_data:
        S.raw_data = WAVEFORMS
    S.Nmulti = Nmulti
    return S
def spectral_filtering_detections(tid, db_path_T='template_db_1/', db_path_M='matched_filter_1/', db_path=autodet.cfg.dbpath, SNR_thres=5., WAVEFORMS=None, normRMS=True, best=True):
    from subprocess import Popen, PIPE
    from obspy import Stream, Trace
    #-----------------------------------------------------------------------------------------------
    T = autodet.db_h5py.read_template('template{:d}'.format(tid), db_path=db_path+db_path_T)
    #-----------------------------------------------------------------------------------------------
    print("Looking for {}{:d}_*".format(db_path + db_path_M + '*multiplets', tid))
    files_list = Popen('ls '+db_path+db_path_M+'*multiplets{:d}_*'.format(tid), stdout=PIPE, shell=True).stdout
    line  = files_list.readline()[:-1]
    files = []
    while len(line) != 0:
        files.append(line.decode('utf-8'))
        line = files_list.readline()[:-1]
    i = 0
    Nsamp = 0
    ns = 0
    S = Stream()
    #------------- retrieve metadata ---------------
    while True:
        try:
            wav = files[i][-len('wav.h5'):] == 'wav.h5'
            if wav:
                if Nsamp == 0:
                    with h5.File(files[i], mode='r') as fwav0:
                        Nsamp = fwav0['waveforms'][:,:,:,:].shape[-1]
                i += 1
            else:
                with h5.File(files[i], mode='r') as fm0:
                    if len(fm0['origin_times']) == 0:
                        i += 1
                        continue
                    else:
                        i += 1
                        nc = len(fm0['components'][()])
                        ns = len(fm0['stations'][()])
                        S.stations   = fm0['stations'][()].astype('U').tolist()
                        S.components = fm0['components'][()].astype('U').tolist()
                        S.latitude   = T.metadata['latitude']
                        S.longitude  = T.metadata['longitude']
                        S.depth      = T.metadata['depth']
                        S.template_ID = tid
            if ns != 0 and Nsamp != 0:
                break
        except IndexError:
            print("None multiplet for template {:d} !! Return None".format(tid))
            return None
    #----------------------------------------------
    if WAVEFORMS is None:
        CC = np.zeros(0, dtype=np.float32)
        if best:
            for file in files:
                if file[-len('meta.h5'):] != 'meta.h5':
                    continue
                with h5.File(file, mode='r') as fm:
                    if len(fm['correlation_coefficients']) == 0:
                        continue
                    else:
                        CC = np.hstack((CC, fm['correlation_coefficients'][:]))
            CC = np.sort(CC)
            #CC_thres = np.sort(CC)[-min(5, len(CC))]
            if len(CC) > 300:
                CC_thres = CC[-101] 
            elif len(CC) > 70:
                CC_thres = CC[int(7./10.*len(CC))] # the best 30%
            elif len(CC) > 30:
                CC_thres = np.median(CC) # the best 50%
            elif len(CC) > 10:
                CC_thres = np.percentile(CC, 33.) # the best 66% detections 
            else:
                CC_thres = 0.
        Nstack = np.zeros((ns, nc), dtype=np.float32)
        WAVEFORMS  = np.zeros((0,ns,nc,Nsamp), dtype=np.float32)
        Nmulti = 0
        for file in files:
            if file[-len('wav.h5'):] != 'wav.h5':
                continue
            with h5.File(file, mode='r') as fw:
                if len(fw['waveforms']) == 0:
                    continue
                else:
                    if best:
                        with h5.File(file[:-len('wav.h5')]+'meta.h5', mode='r') as fm:
                            selection = np.where(fm['correlation_coefficients'][:] > CC_thres)[0]
                            if selection.size == 0:
                                continue
                            waves = np.zeros((selection.size, ns, nc, Nsamp), dtype=np.float32)
                            waves[:,:,:,:] = fw['waveforms'][selection,:,:,:]
                    else:
                        waves = fw['waveforms'][:,:,:,:]
                    Nmulti += waves.shape[0]
                    for m in range(waves.shape[0]):
                        for s in range(ns):
                            for c in range(nc):
                                if normRMS:
                                    norm = np.sqrt(np.var(waves[m,s,c,:]))
                                else:
                                    norm =1.
                                if norm != 0.:
                                    waves[m,s,c,:] /= norm
                    WAVEFORMS = np.vstack((WAVEFORMS, waves))
    elif normRMS:
        for m in range(WAVEFORMS.shape[0]):
            for s in range(ns):
                for c in range(nc):
                    norm = np.sqrt(np.var(WAVEFORMS[m,s,c,:]))
                    if norm != 0.:
                        WAVEFORMS[m,s,c,:] /= norm
    else:
        pass
    filtered_waveforms = np.zeros((ns, nc, Nsamp), dtype=np.float32)
    for s in range(ns):
        for c in range(nc):
            filtered_waveforms[s, c, :] = np.sum(spectral_filtering(WAVEFORMS[:, s, c, :], SNR_thres=SNR_thres), axis=0)
    return filtered_waveforms
def SVDWF_multiplets_test(template_id, db_path=autodet.cfg.dbpath, db_path_M='matched_filter_2/', db_path_T='template_db_2/', WAVEFORMS=None, normRMS=True, Nsing_values=5, max_freq=autodet.cfg.max_freq, attach_raw_data=False):
    from subprocess import Popen, PIPE
    from obspy import Stream, Trace
    from scipy.linalg import svd
    from scipy.signal import wiener
    #-----------------------------------------------------------------------------------------------
    T = autodet.db_h5py.read_template('template{:d}'.format(template_id), db_path=db_path+db_path_T)
    #-----------------------------------------------------------------------------------------------
    print("Looking for {}{:d}_*".format(db_path + db_path_M + '*multiplets', template_id))
    files_list = Popen('ls '+db_path+db_path_M+'*multiplets{:d}_*'.format(template_id), stdout=PIPE, shell=True).stdout
    line  = files_list.readline()[:-1]
    files = []
    while len(line) != 0:
        files.append(line.decode('utf-8'))
        line = files_list.readline()[:-1]
    i = 0
    Nsamp = 0
    ns = 0
    S = Stream()
    #------------- retrieve metadata ---------------
    while True:
        try:
            wav = files[i][-len('wav.h5'):] == 'wav.h5'
            if wav:
                if Nsamp == 0:
                    with h5.File(files[i], mode='r') as fwav0:
                        Nsamp = fwav0['waveforms'][:,:,:,:].shape[-1]
                i += 1
            else:
                with h5.File(files[i], mode='r') as fm0:
                    if len(fm0['origin_times']) == 0:
                        i += 1
                        continue
                    else:
                        i += 1
                        nc = len(fm0['components'][()])
                        ns = len(fm0['stations'][()])
                        S.stations   = fm0['stations'][()].astype('U').tolist()
                        S.components = fm0['components'][()].astype('U').tolist()
                        S.latitude   = T.metadata['latitude']
                        S.longitude  = T.metadata['longitude']
                        S.depth      = T.metadata['depth']
                        S.template_id = template_id
            if ns != 0 and Nsamp != 0:
                break
        except IndexError:
            print("None multiplet for template {:d} !! Return None".format(template_id))
            return None
    #----------------------------------------------
    catalog = autodet.db_h5py.read_catalog_multiplets('multiplets{:d}'.format(template_id), db_path_M=db_path_M, db_path=db_path)
    CC      = catalog['correlation_coefficients']
    best_detection_indexes = np.argsort(CC)[::-1]
    if CC.size > 300:
        best_detection_indexes = best_detection_indexes[:100]                     # the best 100 detections
    elif CC.size > 100:
        best_detection_indexes = best_detection_indexes[:int(30./100. * CC.size)] # the best 30%
    elif CC.size > 50:
        best_detection_indexes = best_detection_indexes[:int(50./100. * CC.size)] # the best 50%
    elif CC>size > 10:
        best_detection_indexes = best_detection_indexes[:int(66./100. * CC.size)] # the best 66%
    else:
        pass # keep all detections
    # reorder by chronological order
    best_detection_indexes = best_detection_indexes[np.argsort(catalog['origin_times'][best_detection_indexes])]
    # get the waveforms
    n_events  = best_detection_indexes.size
    WAVEFORMS = np.zeros((n_events, ns, nc, Nsamp), dtype=np.float32)
    filename0 = db_path + db_path_M + catalog['filenames'][best_detection_indexes[0]].decode('utf-8')
    f         = h5.File(filename0 + 'wav.h5', mode='r')
    for n in range(n_events):
        filename = db_path + db_path_M + catalog['filenames'][best_detection_indexes[n]].decode('utf-8')
        if filename == filename0:
            pass
        else:
            f.close()
            f = h5.File(filename + 'wav.h5', mode='r')
        WAVEFORMS[n, :, :, :] = f['waveforms'][catalog['indices'][best_detection_indexes[n]], :, :, :]
        # normalization
        for s in range(ns):
            for c in range(nc):
                if normRMS:
                    norm = np.std(WAVEFORMS[n, s, c, :])
                else:
                    norm = np.abs(WAVEFORMS[n, s, c, :]).max()
                if norm != 0.:
                    WAVEFORMS[n, s, c, :] /= norm
    filtered_data = np.zeros((n_events, ns, nc, Nsamp), dtype=np.float32)
    for s in range(ns):
        for c in range(nc):
            filtered_data[:,s,c,:] = SVDWF(WAVEFORMS[:,s,c,:], Nsing_values, max_freq=max_freq)
            #filtered_data[:,s,c,:] = spectral_filtering(WAVEFORMS[:,s,c,:], SNR_thres=5., max_freq=max_freq)
            mean = np.mean(filtered_data[:,s,c,:], axis=0)
            mean /= np.abs(mean).max()
            S += Trace(data=mean)
            S[-1].stats.station = S.stations[s]
            S[-1].stats.channel = S.components[c]
            S[-1].stats.sampling_rate = autodet.cfg.sampling_rate
    S.data = filtered_data
    if attach_raw_data:
        S.raw_data = WAVEFORMS
    S.Nmulti = best_detection_indexes.size
    return S