def main(obs_path, ref_path, pkl_path): results = None with pyasdf.ASDFDataSet(obs_path, mode="a") as obs_ds: with pyasdf.ASDFDataSet(ref_path, mode="r") as ref_ds: event = obs_ds.events[0] origin = event.preferred_origin() or event.origins[0] evla = origin.latitude evlo = origin.longitude evdp = origin.depth / 1000 # kernel function def process(sg_obs, sg_ref): waveform_tags = sg_obs.get_waveform_tags() inv_obs = sg_obs["StationXML"] station_info = {inv_obs.get_contents()['stations'][0]} # should have only one tag, after we have simplify the asdf file tag_obs = waveform_tags[0] st_obs = sg_obs[tag_obs] # property times stla = inv_obs[0][0].latitude stlo = inv_obs[0][0].longitude property_times = get_property_times(stla, stlo, evla, evlo, evdp) return property_times results = obs_ds.process_two_files_without_parallel_output( ref_ds, process) if (isroot): with open(pkl_path, 'wb') as handle: pickle.dump(results, handle)
def plot_SNR_distance(sdir,ccomp,lag): ''' show the statistic of how SNR varies with distance for the stacked cross correlations ''' #------all station pairs------- sfiles = glob.glob(os.path.join(sdir,'*.h5')) tags = 'Allstacked' if lag==0: snr_para = 'ssnr' elif lag==-1: snr_para = 'nsnr' else: snr_para = 'psnr' #-----read in freq-------- ds = pyasdf.ASDFDataSet(sfiles[0],mode='r') freq = ds.auxiliary_data[tags][ccomp].parameters['freq'] print(freq) del ds #---loop through each station-pair---- for sfile in sfiles: with pyasdf.ASDFDataSet(sfile,mode='r') as ds: dist = ds.auxiliary_data[tags][ccomp].parameters['dist'] snr = ds.auxiliary_data[tags][ccomp].parameters[snr_para] plt.subplot(331) plt.scatter(dist,snr[1],marker='o',s=25,c=0) plt.subplot(332) plt.scatter(dist,snr[2],marker='o',s=25,c=0) plt.subplot(333) plt.scatter(dist,snr[4],marker='o',s=25,c=0) plt.subplot(334) plt.scatter(dist,snr[6],marker='o',s=25,c=0) plt.subplot(335) plt.scatter(dist,snr[7],marker='o',s=25,c=0) plt.subplot(336) plt.scatter(dist,snr[8],marker='o',s=25,c=0) plt.subplot(337) plt.scatter(dist,snr[9],marker='o',s=25,c=0) plt.subplot(338) plt.scatter(dist,snr[10],marker='o',s=25,c=0) plt.subplot(339) plt.scatter(dist,snr[11],marker='o',s=25,c=0) plt.subplot(331) plt.ylabel('SNR') plt.subplot(334) plt.ylabel('SNR') plt.subplot(337) plt.ylabel('SNR') plt.xlabel('distance [km]') plt.subplot(338) plt.xlabel('distance [km]') plt.subplot(339) plt.xlabel('distance [km]') plt.subplot(332) plt.title([sdir.split('/')[-1],dist,'km']) plt.show()
def select_windows(parameters, paths, merge_flag, obs_tag, syn_tag): """ Selects windows by comparing observed and synthetic traces; writes results to a json file :param parameters: dictionary passed directly to pyflex.Config :param paths.obs: ASDF observed data filename :param paths.syn: ASDF synthetic data filename :param paths.output: windows will be written to a JSON file with this name :param paths.log: information about the quantity and quality of windows will be written to a JSON file with this name :param obs_tag: observed data are read using this ASDF tag :param syn_tag: synthetic data are read using this ASDF tag """ from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.rank cwd = dirname(__file__) # read data fullpath = join(cwd, paths.obs) obs = pyasdf.ASDFDataSet(fullpath, compression=None, mode="a") event = obs.events[0] # read synthetics fullpath = join(cwd, paths.syn) syn = pyasdf.ASDFDataSet(fullpath, compression=None, mode="a") # generate pyflex.Config objects config = {} for channel, param in parameters.items(): config[channel] = pyflex.Config(**param) # wrapper is required for ASDF processing def wrapped_function(obs, syn): return pytomo3d.window.window_on_stream(obs[obs_tag], syn[syn_tag], config, station=obs.StationXML, event=event, user_modules=None, figure_mode=False, figure_dir=None, _verbose=False) # run window selection windows = obs.process_two_files_without_parallel_output( syn, wrapped_function) # save results if rank == 0: if merge_flag: windows = merge(windows) write_windows_json(paths.output, windows)
def combine_adjoint_sources(paths, tag, rotate=True, auxiliary_data=False): """ Sums adjoint sources from different ASDF files in a linear combination with user-supplied weights. Can be useful, for example, if previous data processing, window selection, and misfit measurements steps were carried out separately for different pass bands (e.g. long period, short period) or different phases (e.g. body waves, surface waves). param paths.input: ASDF files containing adjoint sources type paths.input: list param paths.weight: corresponding list of JSON files containing weights type paths.weight: list param paths.output: summed adjoint sources are written to this ASDF file type paths.output: str """ cwd = dirname(__file__) adjoint_sources_all = [] weights_all = [] for filenames in zip_catch(paths.input, paths.weights): # read adjoint sources fullname = join(cwd, filenames[0]) adjoint_sources_all += [ pyasdf.ASDFDataSet(fullname, mpi=False, mode='r') ] # read user-supplied weights fullname = join(cwd, filenames[1]) weights_all += [read_json(fullname)] # create output file shutil.copy(paths.input[0], paths.output) adjoint_sources_sum = pyasdf.ASDFDataSet(paths.output, mpi=False, mode="a") # overwite output file data with zeros for waveform in adjoint_sources_sum.waveforms: for trace in waveform[tag]: trace.data[:] = 0. # weighted sum of adjoint sources for weights, adjoint_sources in zip(weights_all, adjoint_sources_all): for station in weights: for trace1, trace2 in \ zip(adjoint_sources_sum.waveforms[station][tag], adjoint_sources.waveforms[station][tag]): trace1.data += weights[station][trace1.id] * trace2.data # write misfit pass
def __init__(self, filename, compression=None): """Create an ASDF file given an Event and list of StationStreams. Args: filename (str): Path to ASDF file to create. compression (str): Any value supported by pyasdf.asdf_data_set.ASDFDataSet. """ if os.path.exists(filename): self.dataset = pyasdf.ASDFDataSet(filename) else: self.dataset = pyasdf.ASDFDataSet( filename, compression=compression) self.FORMAT_VERSION = FORMAT_VERSION
def plot_cc_withtime_stack(ccffile,freqmin,freqmax,ccomp,maxlag=None): ''' plot the filtered cross-correlation functions between station-pair sta1-sta2 for all of the available days stored in ccfdir example: plot_cc_withtime('/Users/chengxin/Documents/Harvard/Kanto_basin/Mesonet_BW/STACK/E.AYHM/E.AYHM_E.ENZM.h5',1,5,'ZZ',50) ''' #---basic parameters---- if not maxlag: maxlag = 100 #-----check whether file exists------ if os.path.isfile(ccffile): with pyasdf.ASDFDataSet(ccffile,mode='r') as ds: rlist = ds.auxiliary_data['Allstacked'].list() if not rlist: raise ValueError('no data stacked for %s'%ccffile) dt = ds.auxiliary_data['Allstacked'][rlist[0]].parameters['dt'] tt = np.arange(-maxlag/dt, maxlag/dt+1)*dt dist = ds.auxiliary_data['Allstacked'][rlist[0]].parameters['dist'] #-----loop through each day----- slist = ds.auxiliary_data.list() for ii in range(len(slist)-1): if ii%60==0: plt.figure(figsize=(9,6)) iday = slist[ii] #-----in case there is no data on that day----- try: data = ds.auxiliary_data[iday][ccomp].data[:] data = bandpass(data,freqmin,freqmax,int(1/dt),corners=4, zerophase=True) #--normalize the data---- data = data/max(data) except Exception: data = np.zeros(tt.shape,dtype=np.float32) npts = len(data) #----make index---- indx0 = npts//2 tindx = int(maxlag/dt) if ii==0: color = 'b-' else: color = 'k-' plt.plot(tt,data[indx0-tindx:indx0+tindx+1]+ii*2,color,linewidth=0.5) plt.text(maxlag*0.7,ii*2,iday,fontsize=6) plt.grid(True) #---------highlight zero time------------ plt.plot([0,0],[0,ii*2],'r--',linewidth=1.5) plt.title('%s %s, dist:%6.1fkm @%4.1f-%4.1f Hz' % (ccffile.split('/')[-1],ccomp,dist,freqmin,freqmax)) plt.xlabel('time [s]') plt.ylabel('days') plt.show()
def load_corr(corr_h5,comp): """ Load correlations into numpy array. Prepares for input into MWCS. :type h5: str :param h5: path/filename (/Volumes/.../../~.h5) to save cross-correlations as ASDF data set. Must end in .h5 :param comp: Components used in cross-correlation, e.g. 'ZZ', 'RT', 'TT' """ # query dataset net_sta = os.path.basename(corr_h5).replace('.h5','') with pyasdf.ASDFDataSet(corr_h5,mpi=False) as ds: corrs = ds.auxiliary_data.CrossCorrelation[net_sta][comp].list() # data to return all_param = [] all_data = [] for corr in corrs: data = ds.auxiliary_data.CrossCorrelation[net_sta][comp][corr].data param = ds.auxiliary_data.CrossCorrelation[net_sta][comp][corr].parameters all_data.append(np.array(data)) all_param.append(param) all_data = np.array(all_data) all_data = np.vstack(all_data) days = [c[-10:].replace('_','/') for c in corrs] return all_data, all_param, days, net_sta
def ASDFmodel(self, infname, per=10., phgr=1, verbose=True, Dx=0., Dz=0.): """ Read ASDF model ============================================================================= Input Parameters: infname - input file name per - period phgr - use phase(1) or group(2) velocity ============================================================================= """ dbase = pyasdf.ASDFDataSet(infname) if verbose == True: print dbase.auxiliary_data.Disp perArr = dbase.auxiliary_data.Disp.VP000.data.value[0, :] vArr = dbase.auxiliary_data.Disp.VP000.data.value[phgr, :] if not np.any(perArr == per): raise ValueError('Period ', per, ' sec not in the theoretical dispersion curve !') Vs0 = vArr[perArr == per] * 1000. self.setbackground(vs=Vs0) if self.plotflag == True: self.VsArrPlot[:] = Vs0 for auxid in dbase.auxiliary_data.Disp.list()[1:]: perArr = dbase.auxiliary_data.Disp[auxid].data.value[0, :] vArr = dbase.auxiliary_data.Disp[auxid].data.value[phgr, :] Rmax = dbase.auxiliary_data.Disp[auxid].parameters['Rmax'] Rmin = dbase.auxiliary_data.Disp[auxid].parameters['Rmin'] x = dbase.auxiliary_data.Disp[auxid].parameters['x'] y = dbase.auxiliary_data.Disp[auxid].parameters['y'] Vs = vArr[perArr == per] * 1000. self.RingHomoAnomaly(Xc=x + Dx, Zc=y + Dz, Rmax=Rmax, Rmin=Rmin, va=Vs) return
def correlate(): # Create output directory, if necessary outdir = os.path.join('data', 'correlations') if rank == 0 and not os.path.exists(outdir): os.mkdir(outdir) comm.Barrier() # Create own output directory, if necessary #rankdir = os.path.join(outdir,'rank_%g' %rank) #if not os.path.exists(rankdir): # os.mkdir(rankdir) # correlation report file output_file = os.path.join(outdir, 'correlation_report_rank%g.txt' % rank) if os.path.exists(output_file): ofid = open(output_file, 'a') print('Resuming correlation job, Date:', file=ofid) print(time.strftime('%Y.%m.%dT%H:%M'), file=ofid) else: ofid = open(output_file, 'w') print('Correlation job, Date:', file=ofid) print(time.strftime('%Y.%m.%dT%H:%M'), file=ofid) # - get list of files available; # - get blocks of channel pairs c = correlation_inventory(cfg) # - LOOP over blocks: for b in c.blocks[rank::size]: block = deepcopy(b) # initialize a block of correlations c = CorrBlock(block, cfg) c.run(output_file=ofid) # - append all the stationxmls to the asdf file, if asdf output is chosen if cfg.format_output.upper() == "ASDF" and rank == 0: filename = os.path.join('data', 'correlations', 'correlations.h5') with pyasdf.ASDFDataSet(filename) as ds: stations = [] for cha in ds.auxiliary_data.CrossCorrelation.list(): stations.append(cha.split('_')[0] + '.' + cha.split('_')[1]) stations = set(stations) for sta in stations: ds.add_stationxml( os.path.join('meta', 'stationxml', '%s.xml' % sta))
def read_asdf_kernel(self): if (self.asdf_path == None): return if (isfile(self.asdf_path)): # since virasdf will not used parallel io, we disable mpi here with pyasdf.ASDFDataSet(self.asdf_path, mode="r", mpi=False) as ds: self.events = ds.events self.waveforms_list = ds.waveforms.list() for each_net_sta in self.waveforms_list: wg = ds.waveforms[each_net_sta] self.tag = wg.get_waveform_tags()[0] if (not self.usecopy): self.waveforms[each_net_sta] = { "inv": wg["StationXML"], "st": wg[self.tag] } else: self.waveforms[each_net_sta] = { "inv": wg["StationXML"].copy(), "st": wg[self.tag].copy() } # it's better to use float32 for tr in self.waveforms[each_net_sta]["st"]: tr.data = np.require(tr.data, dtype="float32") else: pass
def custom_get_waveforms(network, station, location, channel, starttime, endtime, quality=None, minimumlength=None, longestonly=None, filename=None, attach_response=False, **kwargs): with pyasdf.ASDFDataSet( '/g/data/ha3/Passive/_ANU/7D(2012-2013)/ASDF/7D(2012-2013).h5', mode='r') as asdfDataSet: st = Stream() #ignoring channel for now as all the 7D network waveforms have only BH? channels filteredList = [ i for i in asdfDataSet.waveforms[network + '.' + station].list() if 'raw_recording' in i and UTC(i.split("__")[1]) < starttime and UTC(i.split("__")[2]) > endtime ] for t in filteredList: st += asdfDataSet.waveforms[network + '.' + station][t] return st
def plot_spectrum2(sfile,iday,icomp,freqmin,freqmax): ''' this script plots the noise spectrum for the idayth on icomp (results from step1) and compare it with the waveforms in time-domain ''' dt = 0.05 sta = sfile.split('/')[-1].split('.')[1] ds = pyasdf.ASDFDataSet(sfile,mode='r') comp = ds.auxiliary_data.list() #--check whether it exists---- if icomp in comp: tlist = ds.auxiliary_data[icomp].list() if iday in tlist: spect = ds.auxiliary_data[icomp][iday].data[:] #---look at hourly---- if spect.ndim==2: nfft = spect.shape[1] plt.title('station %s %s @ %s' % (sta,icomp,iday)) for ii in range(spect.shape[0]): waveform = np.real(np.fft.irfft(spect[ii])[0:nfft]) waveform = bandpass(waveform,freqmin,freqmax,int(1/dt),corners=4, zerophase=True) waveform = waveform*0.5/max(waveform) plt.plot(np.arange(0,nfft)*dt,waveform+ii,'k-') #plt.plot([0,nfft*dt],[ii,ii],'r--',linewidth=0.2) plt.show()
def retrieve_events_information(dir_observe): ds = pyasdf.ASDFDataSet(dir_observe) event = ds.events[0] datetime = event.origins[0].time evlo = event.origins[0].longitude evla = event.origins[0].latitude evdp = event.origins[0].depth / 1000. otime = event.origins[0].time list = ds.waveforms.list() focal_mechanism = event.focal_mechanisms[0]['moment_tensor'] m0 = focal_mechanism['scalar_moment'] mrr = focal_mechanism['tensor']['m_rr'] / m0 mtt = focal_mechanism['tensor']['m_tt'] / m0 mpp = focal_mechanism['tensor']['m_pp'] / m0 mrt = focal_mechanism['tensor']['m_rt'] / m0 mrp = focal_mechanism['tensor']['m_rp'] / m0 mtp = focal_mechanism['tensor']['m_tp'] / m0 parameters1 = event.focal_mechanisms[0]['nodal_planes']['nodal_plane_1'] strike_nodal_1 = parameters1.strike dip_nodal_1 = parameters1.dip rake_nodal_1 = parameters1.rake m_rr_init, m_tt_init, m_pp_init, m_rp_init, m_rt_init, m_tp_init = functions.DC_MT( strike_nodal_1, dip_nodal_1, rake_nodal_1) m_init_fullMT = np.array([mrr, mtt, mpp, mrt, mrp, mtp]) m_init_DC = np.array( [m_rr_init, m_tt_init, m_pp_init, m_rt_init, m_rp_init, m_tp_init]) return parameters1, m_init_fullMT, m_init_DC
def process_single_event(min_periods, max_periods, taper_tmin_tmax, asdf_filename, waveform_length, sampling_rate, output_directory): # with pyasdf.ASDFDataSet(asdf_filename) as ds: with pyasdf.ASDFDataSet(asdf_filename, mode="r") as ds: # some parameters event = ds.events[0] origin = event.preferred_origin() or event.origins[0] event_time = origin.time for min_period, max_period in zip(min_periods, max_periods): # inv will be None if no inv provided def process_function(st, inv): # pylint: disable=unused-argument # wrap process_sync_single_trace st = process_sync_single_trace( st, event_time, waveform_length, taper_tmin_tmax, min_period, max_period, sampling_rate) # pylint: disable=cell-var-from-loop return st tag_name = "preprocessed_%is_to_%is" % ( int(min_period), int(max_period)) tag_map = { "synthetic": tag_name } output_name_head = asdf_filename.split("/")[-1].split(".")[0] ds.process(process_function, join( output_directory, output_name_head+"."+tag_name + ".h5"), tag_map=tag_map)
def generateStationTestData(sta): time_range = (UTCDateTime(TIME_RANGE[0]), UTCDateTime(TIME_RANGE[1])) client = Client("IRIS") inv = client.get_stations(network=NETWORK, station=sta, channel=CHANNEL, starttime=time_range[0], endtime=time_range[1], level='channel') print(inv) traces = client.get_waveforms(network=NETWORK, station=sta, channel=CHANNEL, location='*', starttime=time_range[0], endtime=time_range[1]) print(traces) outfile = 'test_data_' + sta + '.h5' asdf_out = pyasdf.ASDFDataSet(outfile, mode='w') asdf_out.add_stationxml(inv) asdf_out.add_waveforms(traces, TAG) print("Saved data to " + outfile)
def add_to_adsf_file(filename, folder, tag, verbose=False): files = [os.path.join(folder, _i) for _i in os.listdir(folder)] if verbose: print("Found %i potential files to add." % len(files)) print("Opening '%s' ..." % filename) with pyasdf.ASDFDataSet(filename) as f: _add_to_adsf_file(f=f, files=files, tag=tag, verbose=verbose)
def __init__(self, asdf_source, logger=None, single_item_read_limit_in_mb=1024): """ :param asdf_source: path to a text file containing a list of ASDF files: Entries can be commented out with '#' :param logger: logger instance """ self.comm = MPI.COMM_WORLD self.nproc = self.comm.Get_size() self.rank = self.comm.Get_rank() self.logger = logger self.asdf_source = None self.asdf_file_names = [] self.asdf_station_coordinates = [] if (type(asdf_source) == str): self.asdf_source = asdf_source self.source_sha1 = hashlib.sha1( open(self.asdf_source).read().encode('utf-8')).hexdigest() fileContents = list( filter(len, open(self.asdf_source).read().splitlines())) # collate file names for i in range(len(fileContents)): if (fileContents[i][0] == '#'): continue # filter commented lines fn = fileContents[i].strip(' \t\n\r\n') self.asdf_file_names.append(fn) # end for else: raise NameError('Invalid value for asdf_source..') # end if self.asdf_datasets = [] for ifn, fn in enumerate(self.asdf_file_names): if self.logger: self.logger.info('Opening ASDF file %s..' % (fn)) if (os.path.exists(fn)): ds = pyasdf.ASDFDataSet(fn, mode='r') ds.single_item_read_limit_in_mb = single_item_read_limit_in_mb self.asdf_datasets.append(ds) self.asdf_station_coordinates.append(defaultdict(list)) else: raise NameError('File not found: %s..' % fn) # end if # end func # Create database self.conn = None self.db_fn = os.path.join(os.path.dirname(self.asdf_source), self.source_sha1 + '.db') self.create_database() atexit.register(self.cleanup) # needed for closing asdf files at exit
def d_syb_build(dir_direct, list_input, windows): ds_obs = pyasdf.ASDFDataSet(dir_direct) size = len(list_input) length = np.zeros(size) for i in np.arange(0, size, 1): length[i] = windows[i][1] - windows[i][0] + 1 length[i] = length[i] length = np.sum(length) print(length) d_Z = np.zeros((1, int(length)), dtype=float) d_E = np.zeros((1, int(length)), dtype=float) d_N = np.zeros((1, int(length)), dtype=float) N_index_left = 0 N_index_right = 0 index_left = np.array([], dtype=int) index_right = np.array([], dtype=int) for _i, stid in enumerate(list_input): st_obs_A = ds_obs.waveforms[stid].displacement st_obs_ROTA_A_Z = st_obs_A.select(component="Z")[0].data st_obs_ROTA_A_E = st_obs_A.select(component="E")[0].data st_obs_ROTA_A_N = st_obs_A.select(component="N")[0].data N_index_right += windows[i][1] - windows[i][0] index_left = np.append(index_left, N_index_left) index_right = np.append(index_right, N_index_right) d_Z[0][N_index_left:N_index_right] = st_obs_ROTA_A_Z[ windows[i][0]:windows[i][1]] d_E[0][N_index_left:N_index_right] = st_obs_ROTA_A_E[ windows[i][0]:windows[i][1]] d_N[0][N_index_left:N_index_right] = st_obs_ROTA_A_N[ windows[i][0]:windows[i][1]] N_index_left += windows[i][1] - windows[i][0] print(N_index_left) return d_Z, d_E, d_N, index_left, index_right
def extract_waveform_stations(asdf, stations=None): """ Extract station information from wavefrom group """ if isinstance(asdf, str): ds = pyasdf.ASDFDataSet(asdf, mode='r') elif isinstance(asdf, pyasdf.ASDFDataSet): ds = asdf else: raise TypeError("Input asdf either be a filename or " "pyasdf.ASDFDataSet") sta_dict = {} if stations is None: stations = ds.waveforms.list() for st_id in stations: station_group = getattr(ds.waveforms, st_id) if "StationXML" not in dir(station_group): continue staxml = getattr(station_group, "StationXML") sta_dict[st_id] = [ staxml[0][0].latitude, staxml[0][0].longitude, staxml[0][0].elevation, staxml[0][0][0].depth ] return sta_dict
def asdf_create(asdf_name, wav_dirs, sta_dir): """ Wrapper on ASDFDataSet to create a new HDF5 file which includes all waveforms in a directory and stationXML directory :param asdf_name: Full path to new asdf file :param wav_dir: List of directories of waveform files (will grab all files) :param sta_dir: Directory of stationXML files (will grab all files) :return: """ with pyasdf.ASDFDataSet(asdf_name) as ds: wav_files = [] for wav_dir in wav_dirs: wav_files.extend([os.path.join(root, a_file) for root, dirs, files in os.walk(wav_dir) for a_file in files]) for _i, filename in enumerate(wav_files): print("Adding mseed file %i of %i..." % (_i+1, len(wav_files))) st = read(filename) #Add waveforms ds.add_waveforms(st, tag="raw_recording") sta_files = glob('%s/*' % sta_dir) for filename in sta_files: ds.add_stationxml(filename) return
def extract_adjoint_stations(asdf, stations=None): """ Extract station information from adjoint source group """ if isinstance(asdf, str): ds = pyasdf.ASDFDataSet(asdf, mode='r') elif isinstance(asdf, pyasdf.ASDFDataSet): ds = asdf else: raise TypeError("Input asdf either be a filename or " "pyasdf.ASDFDataSet") sta_dict = {} try: adjsrcs = ds.auxiliary_data.AdjointSources except: return {} if stations is None: stations = adjsrcs.list() for adj_name in stations: adj = getattr(adjsrcs, adj_name) pars = adj.parameters station_id = pars["station_id"] if station_id not in sta_dict: sta_dict[station_id] = [ pars["latitude"], pars["longitude"], pars["elevation_in_m"], pars["depth_in_m"] ] return sta_dict
def append(self, dsfid, srcrcv=True, windows=True): """ Simple function to parse information from a pyasdf.asdf_data_setASDFDataSet file and append it to the currect collection of information. :type dsfid: str :param dsfid: fid of the dataset :type srcrcv: bool :param srcrcv: gather source-receiver information :type windows: bool :param windows: gather window information """ try: with pyasdf.ASDFDataSet(dsfid) as ds: if srcrcv: self._get_srcrcv_from_dataset(ds) if windows: try: self._get_windows_from_dataset(ds) except AttributeError as e: if self.verbose: print("error reading dataset: " "missing auxiliary data") return except OSError: if self.verbose: print(f"error reading dataset: already open") return
def extract_station_info_from_asdf(asdf, verbose=False): """ extract the sensor type from stationxml in asdf file """ if isinstance(asdf, str) or isinstance(asdf, unicode): ds = pyasdf.ASDFDataSet(asdf, mode='r') elif isinstance(asdf, pyasdf.ASDFDataSet): ds = asdf else: raise TypeError("Input asdf either be a filename or " "pyasdf.ASDFDataSet") asdf_sensors = dict() ntotal = len(ds.waveforms) for idx, st_group in enumerate(ds.waveforms): if verbose: print("[%4d/%d]Station: %s" % (idx, ntotal, st_group._station_name)) try: inv = st_group.StationXML info = extract_staxml_info(inv) asdf_sensors.update(info) except Exception as msg: print("Failed to extract due to: %s" % msg) continue print("Number of stations and channels: %d, %d" % (ntotal, len(asdf_sensors))) return asdf_sensors
def vibbox_to_asdf(files, inv, param_file): """ Convert a list of vibbox files to ASDF files of the same name :param files: List of files to convert :param inventory: Inventory object to add to asdf files :param param_file: path to yaml config file for DUG-seis :return: """ # Load in the parameters with open(param_file, 'r') as f: param = yaml.load(f, Loader=yaml.FullLoader) outdir = param['Acquisition']['asdf_settings']['data_folder'] for afile in files: name = os.path.join(outdir, afile.split('/')[-2], afile.split('/')[-1].replace('.dat', '.h5')) if not os.path.isdir(os.path.dirname(name)): os.mkdir(os.path.dirname(name)) if os.path.isfile(name): print('{} already written'.format(name)) continue print('Writing {} to {}'.format(afile, name)) try: st = vibbox_read(afile, param) except ValueError as e: # Wrong number of samples? print(e) continue with pyasdf.ASDFDataSet(name, compression='gzip-3') as asdf: asdf.add_stationxml(inv) asdf.add_waveforms(st, tag='raw_recording') return
def obs_asdf_process(dir_observe,eventname,min_period,max_period): ##the name of the file of the preprocessed data name and directory processdir=eventname+'preprocessed' processdata='preprocessed_'+ str(min_period) +'s_to_'+str(max_period)+'s.h5' print(processdata) if os.path.exists(processdir): os.system('rm -rf'+' '+processdir) os.makedirs(processdir) os.system('cp'+' '+dir_observe+' '+processdir+'/'+processdata) with pyasdf.ASDFDataSet(processdir+'/'+processdata) as ds_obs: list=ds_obs.waveforms.list() tag_name='preprocessed_'+ str(min_period) +'s_to_'+str(max_period)+'s' for _i,stid in enumerate(list): stla,stlo,evz=ds_obs.waveforms[stid].coordinates.values() st_obs=ds_obs.waveforms[stid].raw_recording #st_syn_gre=ds_syn.waveforms[stid].displacement st_obs.detrend("linear") st_obs.detrend("demean") st_obs.taper(0.05, type="cosine") st_obs.filter("bandpass", freqmin=1.0 / max_period, freqmax=1.0 / min_period, corners=3, zerophase=False) st_obs.detrend("linear") st_obs.detrend("demean") st_obs.taper(0.05, type="cosine") st_obs.filter("bandpass", freqmin=1.0 / max_period, freqmax=1.0 / min_period, corners=3, zerophase=False) ds_obs.add_waveforms(st_obs,tag=tag_name) del ds_obs.waveforms[stid].raw_recording del ds_obs.waveforms[stid].preprocess
def queryByBBoxInterval(self, outputFileName, bbox, timeinterval, chan='*Z', bbpadding=2, event_id=None, verbose=False): """ Time interval is a tuple (starttime,endtime) """ assert len(timeinterval) == 2, "timeinterval must be a tuple of ascending timestamps. len=" + str( len(timeinterval)) + " " + str(timeinterval) query_ds = pyasdf.ASDFDataSet(outputFileName) client = Client(self._client) ref_inv = client.get_stations(network=self._network, starttime=UTCDateTime(timeinterval[0]), endtime=UTCDateTime(timeinterval[1]), minlongitude=bbox[0] - bbpadding, maxlongitude=bbox[1] + bbpadding, minlatitude=bbox[2] - bbpadding, maxlatitude=bbox[3] + bbpadding, level='channel') if verbose: print(ref_inv) ref_st = Stream() # go through inventory and request timeseries data for net in ref_inv: for stn in net: stime = UTCDateTime(timeinterval[0]) etime = UTCDateTime(timeinterval[1]) step = 3600*24*10 while stime + step < etime: try: ref_st = client.get_waveforms(network=net.code, station=stn.code, channel=chan, location='*', starttime=stime, endtime=stime+step) print ref_st self.ref_stations.append(net.code + '.' + stn.code) st_inv = ref_inv.select(station=stn.code, channel=chan) query_ds.add_stationxml(st_inv) for tr in ref_st: query_ds.add_waveforms(tr, "reference_station") except FDSNException: print('Data not available from Reference Station: ' + stn.code) # end try stime += step #wend # end for #tr.write(os.path.join(os.path.dirname(outputFileName), tr.id + ".MSEED"), # format="MSEED") # Don't write miniseed if verbose: print("Wrote Reference Waveforms to ASDF file: " + outputFileName) print('\nWaveform data query completed.') metaOutputFileName = os.path.join(os.path.dirname(outputFileName), 'meta.%s.xml'%(os.path.basename(outputFileName))) ref_inv.write(metaOutputFileName, format="STATIONXML") del query_ds
def compare_c2_c3_waveforms(c2file,c3file,maxlag,c2_maxlag,dt): ''' use data type from c3file to plot the waveform for c2 and c3 note that the length of c3file is shorter than c2file c2file: HDF5 file for normal cross-correlation function c3file: HDF5 file for C3 function maxlag: maximum time lag for C3 c2_maxlag: maxinum time lag for C1 dt: time increment ''' c2file = '/Users/chengxin/Documents/Harvard/Kanto_basin/code/KANTO/CCF/2010_01_11.h5' c3file = '/Users/chengxin/Documents/Harvard/Kanto_basin/code/KANTO/CCF_C3/2010_01_11.h5' maxlag = 1000 c2_maxlag = 1800 dt = 0.05 #-------time axis------- tt = np.arange(-maxlag/dt, maxlag/dt+1)*dt tt_c2 = np.arange(-c2_maxlag/dt, c2_maxlag/dt+1)*dt ind = np.where(abs(tt_c2)<=-tt[0])[0] c3_waveform = np.zeros(tt.shape,dtype=np.float32) c2_waveform = np.zeros(tt_c2.shape,dtype=np.float32) #-------make station pairs-------- ds_c2 = pyasdf.ASDFDataSet(c2file,mode='r') ds_c3 = pyasdf.ASDFDataSet(c3file,mode='r') #------loop through all c3 data_types------- data_type_c3 = ds_c3.auxiliary_data.list() for ii in range(len(data_type_c3)): path_c3 = ds_c3.auxiliary_data[data_type_c3[ii]].list() for jj in range(len(path_c3)): print(data_type_c3[ii],path_c3[jj]) sta1 = data_type_c3[ii].split('s')[1] sta2 = path_c3[jj].split('s')[1] c3_waveform = ds_c3.auxiliary_data[data_type_c3[ii]][path_c3[jj]].data[:] c2_waveform = ds_c2.auxiliary_data[data_type_c3[ii]][path_c3[jj]].data[:] c1_waveform = c2_waveform[ind] plt.subplot(211) plt.plot(c3_waveform) plt.subplot(212) plt.plot(c1_waveform) plt.legend(sta1+'_'+sta2,loc='upper right') plt.show()
def plot_multi_freq_stack(sfile,freqmin,freqmax,nfreq,ccomp,tags=None): ''' plot the stacked ccfs for sta1-sta2 at multi-frequency bands between freqmin-freqmax. this may be useful to show the dispersive property of the surface waves, which could be used together with plot_ZH_pmotion to identify surface wave components example: plot_multi_freq_stack('/Users/chengxin/Documents/Harvard/Kanto_basin/Mesonet_BW/STACK/E.AYHM/E.AYHM_E.BKKM.h5',0.08,6,15,'RR') ''' #---basic parameters---- maxlag = 100 #------set path and type------------- ds = pyasdf.ASDFDataSet(sfile,mode='r') slist = ds.auxiliary_data.list() #-----check tags information------ if not tags: tags = "Allstacked" if tags not in slist: raise ValueError('tags %s not in the file %s' % (tags,sfile)) #--------read the data and parameters------------ parameters = ds.auxiliary_data[tags][ccomp].parameters corr = ds.auxiliary_data[tags][ccomp].data[:] sampling_rate = int(1/parameters['dt']) npts = int(2*sampling_rate*parameters['lag']) indx = npts//2 tt = np.arange(-maxlag*sampling_rate, maxlag*sampling_rate+1)/sampling_rate #------make frequency information----- freq = np.zeros(nfreq,dtype=np.float32) step = (np.log(freqmin)-np.log(freqmax))/(nfreq-1) for ii in range(nfreq): freq[ii]=np.exp(np.log(freqmin)-ii*step) indx0 = maxlag*sampling_rate #----loop through each freq----- for ii in range(1,nfreq-1): if ii==1: plt.figure(figsize=(9,6)) f1 = freq[ii-1] f2 = freq[ii+1] ncorr = bandpass(corr,f1,f2,sampling_rate,corners=4,zerophase=True) ncorr = ncorr/max(ncorr) #------plot the signals------- plt.plot(tt,ncorr[indx-indx0:indx+indx0+1]+ii,'k-',linewidth=0.6) ttext = '{0:4.2f}-{1:4.2f} Hz'.format(f1,f2) plt.text(maxlag*0.9,ii+0.3,ttext,fontsize=6) if ii==1: plt.title('%s at %s component' % (sfile.split('/')[-1],ccomp)) plt.xlabel('time [s]') plt.ylabel('waveform #') plt.grid(True) plt.show() del ds
def _get_asdf_dataset(self): try: ds = pyasdf.ASDFDataSet(self.asdf_path, mode="a") self._log.debug("Opened ASDF dataset '{}'.".format(self.asdf_path)) except Exception as e: raise Exception("Failed to create ASDF file at '{}': {}.".format( self.asdf_path, e)) return ds
def process(input_asdf, output_folder, start_date, end_date, length): """ INPUT_ASDF: Path to input ASDF file\n OUTPUT_FOLDER: Output folder \n Example usage: mpirun -np 2 python asdf2mseed.py ./test.asdf /tmp/output --start-date 2013-01-01T00:00:00 --end-date 2016-01-01T00:00:00 Note: """ try: start_date = UTCDateTime(start_date).timestamp if start_date else None end_date = UTCDateTime(end_date).timestamp if end_date else None length = int(length) except: assert 0, 'Invalid input' # end try comm = MPI.COMM_WORLD nproc = comm.Get_size() rank = comm.Get_rank() proc_stations = defaultdict(list) ds = pyasdf.ASDFDataSet(input_asdf) if (rank == 0): # split work over stations in ds1 for the time being stations = ds.get_all_coordinates().keys() meta = ds.get_all_coordinates() count = 0 for iproc in np.arange(nproc): for istation in np.arange(np.divide(len(stations), nproc)): proc_stations[iproc].append(stations[count]) count += 1 # end for for iproc in np.arange(np.mod(len(stations), nproc)): proc_stations[iproc].append(stations[count]) count += 1 # end for # output station meta-data fn = os.path.join(output_folder, 'stations.txt') f = open(fn, 'w+') f.write('#Station\t\tLongitude\t\tLatitude\n') for sn in stations: f.write('%s\t\t%f\t\t%f\n' % (sn, meta[sn]['longitude'], meta[sn]['latitude'])) # end for f.close() # end if # broadcast workload to all procs proc_stations = comm.bcast(proc_stations, root=0) print proc_stations dump_traces(ds, proc_stations[rank], start_date, end_date, length, output_folder)