def main(obs_path, ref_path, pkl_path):
    results = None
    with pyasdf.ASDFDataSet(obs_path, mode="a") as obs_ds:
        with pyasdf.ASDFDataSet(ref_path, mode="r") as ref_ds:
            event = obs_ds.events[0]
            origin = event.preferred_origin() or event.origins[0]
            evla = origin.latitude
            evlo = origin.longitude
            evdp = origin.depth / 1000

            # kernel function
            def process(sg_obs, sg_ref):
                waveform_tags = sg_obs.get_waveform_tags()
                inv_obs = sg_obs["StationXML"]
                station_info = {inv_obs.get_contents()['stations'][0]}

                # should have only one tag, after we have simplify the asdf file
                tag_obs = waveform_tags[0]
                st_obs = sg_obs[tag_obs]

                # property times
                stla = inv_obs[0][0].latitude
                stlo = inv_obs[0][0].longitude
                property_times = get_property_times(stla, stlo, evla, evlo,
                                                    evdp)

                return property_times

            results = obs_ds.process_two_files_without_parallel_output(
                ref_ds, process)

            if (isroot):
                with open(pkl_path, 'wb') as handle:
                    pickle.dump(results, handle)
示例#2
0
def plot_SNR_distance(sdir,ccomp,lag):
    '''
    show the statistic of how SNR varies with distance for the stacked cross correlations
    '''
    #------all station pairs-------
    sfiles = glob.glob(os.path.join(sdir,'*.h5'))
    tags = 'Allstacked'
    if lag==0:
        snr_para = 'ssnr'
    elif lag==-1:
        snr_para = 'nsnr'
    else:
        snr_para = 'psnr'
    
    #-----read in freq--------
    ds = pyasdf.ASDFDataSet(sfiles[0],mode='r')
    freq = ds.auxiliary_data[tags][ccomp].parameters['freq']
    print(freq)
    del ds

    #---loop through each station-pair----
    for sfile in sfiles:
        with pyasdf.ASDFDataSet(sfile,mode='r') as ds:
            dist = ds.auxiliary_data[tags][ccomp].parameters['dist']
            snr = ds.auxiliary_data[tags][ccomp].parameters[snr_para]
            plt.subplot(331)
            plt.scatter(dist,snr[1],marker='o',s=25,c=0)
            plt.subplot(332)
            plt.scatter(dist,snr[2],marker='o',s=25,c=0)
            plt.subplot(333)
            plt.scatter(dist,snr[4],marker='o',s=25,c=0)
            plt.subplot(334)
            plt.scatter(dist,snr[6],marker='o',s=25,c=0)
            plt.subplot(335)
            plt.scatter(dist,snr[7],marker='o',s=25,c=0)
            plt.subplot(336)
            plt.scatter(dist,snr[8],marker='o',s=25,c=0)
            plt.subplot(337)
            plt.scatter(dist,snr[9],marker='o',s=25,c=0)
            plt.subplot(338)
            plt.scatter(dist,snr[10],marker='o',s=25,c=0)
            plt.subplot(339)
            plt.scatter(dist,snr[11],marker='o',s=25,c=0)
    plt.subplot(331)
    plt.ylabel('SNR')
    plt.subplot(334)
    plt.ylabel('SNR')
    plt.subplot(337)
    plt.ylabel('SNR')
    plt.xlabel('distance [km]')
    plt.subplot(338)
    plt.xlabel('distance [km]')
    plt.subplot(339)
    plt.xlabel('distance [km]')
    plt.subplot(332)
    plt.title([sdir.split('/')[-1],dist,'km'])
    plt.show()
示例#3
0
def select_windows(parameters, paths, merge_flag, obs_tag, syn_tag):
    """
    Selects windows by comparing observed and synthetic traces;
    writes results to a json file

    :param parameters: dictionary passed directly to pyflex.Config
    :param paths.obs: ASDF observed data filename
    :param paths.syn: ASDF synthetic data filename
    :param paths.output: windows will be written to a JSON file with this name
    :param paths.log: information about the quantity and quality of windows
        will be written to a JSON file with this name
    :param obs_tag: observed data are read using this ASDF tag
    :param syn_tag: synthetic data are read using this ASDF tag
    """
    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.rank

    cwd = dirname(__file__)

    # read data
    fullpath = join(cwd, paths.obs)
    obs = pyasdf.ASDFDataSet(fullpath, compression=None, mode="a")
    event = obs.events[0]

    # read synthetics
    fullpath = join(cwd, paths.syn)
    syn = pyasdf.ASDFDataSet(fullpath, compression=None, mode="a")

    # generate pyflex.Config objects
    config = {}
    for channel, param in parameters.items():
        config[channel] = pyflex.Config(**param)

    # wrapper is required for ASDF processing
    def wrapped_function(obs, syn):
        return pytomo3d.window.window_on_stream(obs[obs_tag],
                                                syn[syn_tag],
                                                config,
                                                station=obs.StationXML,
                                                event=event,
                                                user_modules=None,
                                                figure_mode=False,
                                                figure_dir=None,
                                                _verbose=False)

    # run window selection
    windows = obs.process_two_files_without_parallel_output(
        syn, wrapped_function)

    # save results
    if rank == 0:
        if merge_flag: windows = merge(windows)
        write_windows_json(paths.output, windows)
def combine_adjoint_sources(paths, tag, rotate=True, auxiliary_data=False):
    """ 
    Sums adjoint sources from different ASDF files in a linear combination with
    user-supplied weights. Can be useful, for example, if previous data 
    processing, window selection, and misfit measurements steps were carried out
    separately for different pass bands (e.g. long period, short period) or 
    different phases (e.g. body waves, surface waves).

    param paths.input: ASDF files containing adjoint sources
    type paths.input: list
    param paths.weight: corresponding list of JSON files containing weights
    type paths.weight: list
    param paths.output: summed adjoint sources are written to this ASDF file
    type paths.output: str
    """

    cwd = dirname(__file__)

    adjoint_sources_all = []
    weights_all = []

    for filenames in zip_catch(paths.input, paths.weights):
        # read adjoint sources
        fullname = join(cwd, filenames[0])
        adjoint_sources_all += [
            pyasdf.ASDFDataSet(fullname, mpi=False, mode='r')
        ]

        # read user-supplied weights
        fullname = join(cwd, filenames[1])
        weights_all += [read_json(fullname)]

    # create output file
    shutil.copy(paths.input[0], paths.output)
    adjoint_sources_sum = pyasdf.ASDFDataSet(paths.output, mpi=False, mode="a")

    # overwite output file data with zeros
    for waveform in adjoint_sources_sum.waveforms:
        for trace in waveform[tag]:
            trace.data[:] = 0.

    # weighted sum of adjoint sources
    for weights, adjoint_sources in zip(weights_all, adjoint_sources_all):
        for station in weights:
            for trace1, trace2 in \
                zip(adjoint_sources_sum.waveforms[station][tag],
                    adjoint_sources.waveforms[station][tag]):
                trace1.data += weights[station][trace1.id] * trace2.data

    # write misfit
    pass
示例#5
0
    def __init__(self, filename, compression=None):
        """Create an ASDF file given an Event and list of StationStreams.

        Args:
            filename (str):
                Path to ASDF file to create.
            compression (str):
                Any value supported by pyasdf.asdf_data_set.ASDFDataSet.
        """
        if os.path.exists(filename):
            self.dataset = pyasdf.ASDFDataSet(filename)
        else:
            self.dataset = pyasdf.ASDFDataSet(
                filename, compression=compression)
        self.FORMAT_VERSION = FORMAT_VERSION
示例#6
0
def plot_cc_withtime_stack(ccffile,freqmin,freqmax,ccomp,maxlag=None):
    '''
    plot the filtered cross-correlation functions between station-pair sta1-sta2
    for all of the available days stored in ccfdir

    example: plot_cc_withtime('/Users/chengxin/Documents/Harvard/Kanto_basin/Mesonet_BW/STACK/E.AYHM/E.AYHM_E.ENZM.h5',1,5,'ZZ',50) 
    '''
    #---basic parameters----
    if not maxlag:
        maxlag = 100

    #-----check whether file exists------
    if os.path.isfile(ccffile):
        with pyasdf.ASDFDataSet(ccffile,mode='r') as ds:
            rlist = ds.auxiliary_data['Allstacked'].list()
            if not rlist:
                raise ValueError('no data stacked for %s'%ccffile)
            dt = ds.auxiliary_data['Allstacked'][rlist[0]].parameters['dt']
            tt = np.arange(-maxlag/dt, maxlag/dt+1)*dt
            dist = ds.auxiliary_data['Allstacked'][rlist[0]].parameters['dist']

            #-----loop through each day-----
            slist = ds.auxiliary_data.list()

            for ii in range(len(slist)-1):
                if ii%60==0:
                    plt.figure(figsize=(9,6))

                iday = slist[ii]

                #-----in case there is no data on that day-----
                try:
                    data = ds.auxiliary_data[iday][ccomp].data[:]
                    data = bandpass(data,freqmin,freqmax,int(1/dt),corners=4, zerophase=True)
                    
                    #--normalize the data----
                    data = data/max(data)

                except Exception:
                    data = np.zeros(tt.shape,dtype=np.float32)
                
                npts = len(data)                
                #----make index----
                indx0 = npts//2
                tindx = int(maxlag/dt)
                if ii==0:
                    color = 'b-'
                else:
                    color = 'k-'
                plt.plot(tt,data[indx0-tindx:indx0+tindx+1]+ii*2,color,linewidth=0.5)
                plt.text(maxlag*0.7,ii*2,iday,fontsize=6)

            plt.grid(True)

            #---------highlight zero time------------
            plt.plot([0,0],[0,ii*2],'r--',linewidth=1.5)
            plt.title('%s %s, dist:%6.1fkm @%4.1f-%4.1f Hz' % (ccffile.split('/')[-1],ccomp,dist,freqmin,freqmax))
            plt.xlabel('time [s]')
            plt.ylabel('days')
            plt.show()
示例#7
0
def load_corr(corr_h5,comp):
    """
    Load correlations into numpy array. Prepares for input into MWCS.

    :type h5: str 
    :param h5: path/filename (/Volumes/.../../~.h5) to save cross-correlations as ASDF data set. Must end in .h5
    :param comp: Components used in cross-correlation, e.g. 'ZZ', 'RT', 'TT'
    """
    
    # query dataset 
    net_sta = os.path.basename(corr_h5).replace('.h5','')
    
    with pyasdf.ASDFDataSet(corr_h5,mpi=False) as ds:
        corrs = ds.auxiliary_data.CrossCorrelation[net_sta][comp].list()

        # data to return 
        all_param = []
        all_data = []

        for corr in corrs:
            data = ds.auxiliary_data.CrossCorrelation[net_sta][comp][corr].data
            param = ds.auxiliary_data.CrossCorrelation[net_sta][comp][corr].parameters
            all_data.append(np.array(data))
            all_param.append(param)
        all_data = np.array(all_data)
        all_data = np.vstack(all_data)
        days = [c[-10:].replace('_','/') for c in corrs]

    return all_data, all_param, days, net_sta 	
示例#8
0
 def ASDFmodel(self, infname, per=10., phgr=1, verbose=True, Dx=0., Dz=0.):
     """
     Read ASDF model
     =============================================================================
     Input Parameters:
     infname   - input file name
     per       - period
     phgr      - use phase(1) or group(2) velocity
     =============================================================================
     """
     dbase = pyasdf.ASDFDataSet(infname)
     if verbose == True:
         print dbase.auxiliary_data.Disp
     perArr = dbase.auxiliary_data.Disp.VP000.data.value[0, :]
     vArr = dbase.auxiliary_data.Disp.VP000.data.value[phgr, :]
     if not np.any(perArr == per):
         raise ValueError('Period ', per,
                          ' sec not in the theoretical dispersion curve !')
     Vs0 = vArr[perArr == per] * 1000.
     self.setbackground(vs=Vs0)
     if self.plotflag == True: self.VsArrPlot[:] = Vs0
     for auxid in dbase.auxiliary_data.Disp.list()[1:]:
         perArr = dbase.auxiliary_data.Disp[auxid].data.value[0, :]
         vArr = dbase.auxiliary_data.Disp[auxid].data.value[phgr, :]
         Rmax = dbase.auxiliary_data.Disp[auxid].parameters['Rmax']
         Rmin = dbase.auxiliary_data.Disp[auxid].parameters['Rmin']
         x = dbase.auxiliary_data.Disp[auxid].parameters['x']
         y = dbase.auxiliary_data.Disp[auxid].parameters['y']
         Vs = vArr[perArr == per] * 1000.
         self.RingHomoAnomaly(Xc=x + Dx,
                              Zc=y + Dz,
                              Rmax=Rmax,
                              Rmin=Rmin,
                              va=Vs)
     return
示例#9
0
def correlate():
    # Create output directory, if necessary

    outdir = os.path.join('data', 'correlations')

    if rank == 0 and not os.path.exists(outdir):
        os.mkdir(outdir)

    comm.Barrier()

    # Create own output directory, if necessary

    #rankdir = os.path.join(outdir,'rank_%g' %rank)
    #if not os.path.exists(rankdir):
    #    os.mkdir(rankdir)

    # correlation report file

    output_file = os.path.join(outdir, 'correlation_report_rank%g.txt' % rank)

    if os.path.exists(output_file):
        ofid = open(output_file, 'a')
        print('Resuming correlation job, Date:', file=ofid)
        print(time.strftime('%Y.%m.%dT%H:%M'), file=ofid)
    else:
        ofid = open(output_file, 'w')
        print('Correlation job, Date:', file=ofid)
        print(time.strftime('%Y.%m.%dT%H:%M'), file=ofid)

    # - get list of files available;
    # - get blocks of channel pairs

    c = correlation_inventory(cfg)

    # - LOOP over blocks:

    for b in c.blocks[rank::size]:

        block = deepcopy(b)
        # initialize a block of correlations
        c = CorrBlock(block, cfg)
        c.run(output_file=ofid)


# - append all the stationxmls to the asdf file, if asdf output is chosen

    if cfg.format_output.upper() == "ASDF" and rank == 0:
        filename = os.path.join('data', 'correlations', 'correlations.h5')

        with pyasdf.ASDFDataSet(filename) as ds:

            stations = []
            for cha in ds.auxiliary_data.CrossCorrelation.list():
                stations.append(cha.split('_')[0] + '.' + cha.split('_')[1])

            stations = set(stations)

            for sta in stations:
                ds.add_stationxml(
                    os.path.join('meta', 'stationxml', '%s.xml' % sta))
示例#10
0
 def read_asdf_kernel(self):
     if (self.asdf_path == None):
         return
     if (isfile(self.asdf_path)):
         # since virasdf will not used parallel io, we disable mpi here
         with pyasdf.ASDFDataSet(self.asdf_path, mode="r", mpi=False) as ds:
             self.events = ds.events
             self.waveforms_list = ds.waveforms.list()
             for each_net_sta in self.waveforms_list:
                 wg = ds.waveforms[each_net_sta]
                 self.tag = wg.get_waveform_tags()[0]
                 if (not self.usecopy):
                     self.waveforms[each_net_sta] = {
                         "inv": wg["StationXML"],
                         "st": wg[self.tag]
                     }
                 else:
                     self.waveforms[each_net_sta] = {
                         "inv": wg["StationXML"].copy(),
                         "st": wg[self.tag].copy()
                     }
                 # it's better to use float32
                 for tr in self.waveforms[each_net_sta]["st"]:
                     tr.data = np.require(tr.data, dtype="float32")
     else:
         pass
示例#11
0
def custom_get_waveforms(network,
                         station,
                         location,
                         channel,
                         starttime,
                         endtime,
                         quality=None,
                         minimumlength=None,
                         longestonly=None,
                         filename=None,
                         attach_response=False,
                         **kwargs):
    with pyasdf.ASDFDataSet(
            '/g/data/ha3/Passive/_ANU/7D(2012-2013)/ASDF/7D(2012-2013).h5',
            mode='r') as asdfDataSet:
        st = Stream()
        #ignoring channel for now as all the 7D network waveforms have only BH? channels
        filteredList = [
            i for i in asdfDataSet.waveforms[network + '.' + station].list()
            if 'raw_recording' in i and UTC(i.split("__")[1]) < starttime
            and UTC(i.split("__")[2]) > endtime
        ]
        for t in filteredList:
            st += asdfDataSet.waveforms[network + '.' + station][t]
        return st
示例#12
0
def plot_spectrum2(sfile,iday,icomp,freqmin,freqmax):
    '''
    this script plots the noise spectrum for the idayth on icomp (results from step1)
    and compare it with the waveforms in time-domain
    '''
    dt = 0.05
    sta = sfile.split('/')[-1].split('.')[1]
    ds = pyasdf.ASDFDataSet(sfile,mode='r')
    comp = ds.auxiliary_data.list()
    
    #--check whether it exists----
    if icomp in comp:
        tlist = ds.auxiliary_data[icomp].list()
        if iday in tlist:
            spect = ds.auxiliary_data[icomp][iday].data[:]

            #---look at hourly----
            if spect.ndim==2:
                nfft = spect.shape[1]
                plt.title('station %s %s @ %s' % (sta,icomp,iday))
                for ii in range(spect.shape[0]):
                    waveform = np.real(np.fft.irfft(spect[ii])[0:nfft])
                    waveform = bandpass(waveform,freqmin,freqmax,int(1/dt),corners=4, zerophase=True)
                    waveform = waveform*0.5/max(waveform)
                    plt.plot(np.arange(0,nfft)*dt,waveform+ii,'k-')
                    #plt.plot([0,nfft*dt],[ii,ii],'r--',linewidth=0.2)
                plt.show()
示例#13
0
def retrieve_events_information(dir_observe):
    ds = pyasdf.ASDFDataSet(dir_observe)
    event = ds.events[0]
    datetime = event.origins[0].time
    evlo = event.origins[0].longitude
    evla = event.origins[0].latitude
    evdp = event.origins[0].depth / 1000.
    otime = event.origins[0].time
    list = ds.waveforms.list()
    focal_mechanism = event.focal_mechanisms[0]['moment_tensor']
    m0 = focal_mechanism['scalar_moment']
    mrr = focal_mechanism['tensor']['m_rr'] / m0
    mtt = focal_mechanism['tensor']['m_tt'] / m0
    mpp = focal_mechanism['tensor']['m_pp'] / m0
    mrt = focal_mechanism['tensor']['m_rt'] / m0
    mrp = focal_mechanism['tensor']['m_rp'] / m0
    mtp = focal_mechanism['tensor']['m_tp'] / m0

    parameters1 = event.focal_mechanisms[0]['nodal_planes']['nodal_plane_1']
    strike_nodal_1 = parameters1.strike
    dip_nodal_1 = parameters1.dip
    rake_nodal_1 = parameters1.rake
    m_rr_init, m_tt_init, m_pp_init, m_rp_init, m_rt_init, m_tp_init = functions.DC_MT(
        strike_nodal_1, dip_nodal_1, rake_nodal_1)
    m_init_fullMT = np.array([mrr, mtt, mpp, mrt, mrp, mtp])
    m_init_DC = np.array(
        [m_rr_init, m_tt_init, m_pp_init, m_rt_init, m_rp_init, m_tp_init])
    return parameters1, m_init_fullMT, m_init_DC
示例#14
0
def process_single_event(min_periods, max_periods, taper_tmin_tmax, asdf_filename, waveform_length, sampling_rate, output_directory):
    # with pyasdf.ASDFDataSet(asdf_filename) as ds:
    with pyasdf.ASDFDataSet(asdf_filename, mode="r") as ds:
        # some parameters
        event = ds.events[0]
        origin = event.preferred_origin() or event.origins[0]
        event_time = origin.time

        for min_period, max_period in zip(min_periods, max_periods):
            # inv will be None if no inv provided
            def process_function(st, inv):  # pylint: disable=unused-argument
                # wrap process_sync_single_trace
                st = process_sync_single_trace(
                    st, event_time, waveform_length, taper_tmin_tmax, min_period, max_period, sampling_rate)  # pylint: disable=cell-var-from-loop

                return st
            tag_name = "preprocessed_%is_to_%is" % (
                int(min_period), int(max_period))
            tag_map = {
                "synthetic": tag_name
            }

            output_name_head = asdf_filename.split("/")[-1].split(".")[0]
            ds.process(process_function, join(
                output_directory, output_name_head+"."+tag_name + ".h5"), tag_map=tag_map)
def generateStationTestData(sta):

    time_range = (UTCDateTime(TIME_RANGE[0]), UTCDateTime(TIME_RANGE[1]))

    client = Client("IRIS")
    inv = client.get_stations(network=NETWORK,
                              station=sta,
                              channel=CHANNEL,
                              starttime=time_range[0],
                              endtime=time_range[1],
                              level='channel')
    print(inv)

    traces = client.get_waveforms(network=NETWORK,
                                  station=sta,
                                  channel=CHANNEL,
                                  location='*',
                                  starttime=time_range[0],
                                  endtime=time_range[1])
    print(traces)

    outfile = 'test_data_' + sta + '.h5'
    asdf_out = pyasdf.ASDFDataSet(outfile, mode='w')
    asdf_out.add_stationxml(inv)
    asdf_out.add_waveforms(traces, TAG)

    print("Saved data to " + outfile)
示例#16
0
def add_to_adsf_file(filename, folder, tag, verbose=False):
    files = [os.path.join(folder, _i) for _i in os.listdir(folder)]
    if verbose:
        print("Found %i potential files to add." % len(files))
    print("Opening '%s' ..." % filename)
    with pyasdf.ASDFDataSet(filename) as f:
        _add_to_adsf_file(f=f, files=files, tag=tag, verbose=verbose)
示例#17
0
    def __init__(self,
                 asdf_source,
                 logger=None,
                 single_item_read_limit_in_mb=1024):
        """
        :param asdf_source: path to a text file containing a list of ASDF files:
               Entries can be commented out with '#'
        :param logger: logger instance
        """

        self.comm = MPI.COMM_WORLD
        self.nproc = self.comm.Get_size()
        self.rank = self.comm.Get_rank()

        self.logger = logger
        self.asdf_source = None
        self.asdf_file_names = []
        self.asdf_station_coordinates = []

        if (type(asdf_source) == str):
            self.asdf_source = asdf_source
            self.source_sha1 = hashlib.sha1(
                open(self.asdf_source).read().encode('utf-8')).hexdigest()
            fileContents = list(
                filter(len,
                       open(self.asdf_source).read().splitlines()))

            # collate file names
            for i in range(len(fileContents)):
                if (fileContents[i][0] == '#'):
                    continue  # filter commented lines

                fn = fileContents[i].strip(' \t\n\r\n')
                self.asdf_file_names.append(fn)
            # end for
        else:
            raise NameError('Invalid value for asdf_source..')
        # end if

        self.asdf_datasets = []
        for ifn, fn in enumerate(self.asdf_file_names):
            if self.logger: self.logger.info('Opening ASDF file %s..' % (fn))

            if (os.path.exists(fn)):
                ds = pyasdf.ASDFDataSet(fn, mode='r')
                ds.single_item_read_limit_in_mb = single_item_read_limit_in_mb
                self.asdf_datasets.append(ds)
                self.asdf_station_coordinates.append(defaultdict(list))
            else:
                raise NameError('File not found: %s..' % fn)
            # end if
        # end func

        # Create database
        self.conn = None
        self.db_fn = os.path.join(os.path.dirname(self.asdf_source),
                                  self.source_sha1 + '.db')
        self.create_database()

        atexit.register(self.cleanup)  # needed for closing asdf files at exit
示例#18
0
def d_syb_build(dir_direct, list_input, windows):
    ds_obs = pyasdf.ASDFDataSet(dir_direct)
    size = len(list_input)
    length = np.zeros(size)
    for i in np.arange(0, size, 1):
        length[i] = windows[i][1] - windows[i][0] + 1
        length[i] = length[i]
    length = np.sum(length)
    print(length)
    d_Z = np.zeros((1, int(length)), dtype=float)
    d_E = np.zeros((1, int(length)), dtype=float)
    d_N = np.zeros((1, int(length)), dtype=float)
    N_index_left = 0
    N_index_right = 0
    index_left = np.array([], dtype=int)
    index_right = np.array([], dtype=int)
    for _i, stid in enumerate(list_input):
        st_obs_A = ds_obs.waveforms[stid].displacement
        st_obs_ROTA_A_Z = st_obs_A.select(component="Z")[0].data
        st_obs_ROTA_A_E = st_obs_A.select(component="E")[0].data
        st_obs_ROTA_A_N = st_obs_A.select(component="N")[0].data
        N_index_right += windows[i][1] - windows[i][0]
        index_left = np.append(index_left, N_index_left)
        index_right = np.append(index_right, N_index_right)
        d_Z[0][N_index_left:N_index_right] = st_obs_ROTA_A_Z[
            windows[i][0]:windows[i][1]]
        d_E[0][N_index_left:N_index_right] = st_obs_ROTA_A_E[
            windows[i][0]:windows[i][1]]
        d_N[0][N_index_left:N_index_right] = st_obs_ROTA_A_N[
            windows[i][0]:windows[i][1]]
        N_index_left += windows[i][1] - windows[i][0]
        print(N_index_left)
    return d_Z, d_E, d_N, index_left, index_right
示例#19
0
def extract_waveform_stations(asdf, stations=None):
    """
    Extract station information from wavefrom group
    """
    if isinstance(asdf, str):
        ds = pyasdf.ASDFDataSet(asdf, mode='r')
    elif isinstance(asdf, pyasdf.ASDFDataSet):
        ds = asdf
    else:
        raise TypeError("Input asdf either be a filename or "
                        "pyasdf.ASDFDataSet")

    sta_dict = {}
    if stations is None:
        stations = ds.waveforms.list()
    for st_id in stations:
        station_group = getattr(ds.waveforms, st_id)
        if "StationXML" not in dir(station_group):
            continue
        staxml = getattr(station_group, "StationXML")
        sta_dict[st_id] = [
            staxml[0][0].latitude, staxml[0][0].longitude,
            staxml[0][0].elevation, staxml[0][0][0].depth
        ]

    return sta_dict
示例#20
0
def asdf_create(asdf_name, wav_dirs, sta_dir):
    """
    Wrapper on ASDFDataSet to create a new HDF5 file which includes
    all waveforms in a directory and stationXML directory
    :param asdf_name: Full path to new asdf file
    :param wav_dir: List of directories of waveform files (will grab all files)
    :param sta_dir: Directory of stationXML files (will grab all files)
    :return:
    """

    with pyasdf.ASDFDataSet(asdf_name) as ds:
        wav_files = []
        for wav_dir in wav_dirs:
            wav_files.extend([os.path.join(root, a_file)
                              for root, dirs, files in os.walk(wav_dir)
                              for a_file in files])
        for _i, filename in enumerate(wav_files):
            print("Adding mseed file %i of %i..." % (_i+1, len(wav_files)))
            st = read(filename)
            #Add waveforms
            ds.add_waveforms(st, tag="raw_recording")
        sta_files = glob('%s/*' % sta_dir)
        for filename in sta_files:
            ds.add_stationxml(filename)
    return
示例#21
0
def extract_adjoint_stations(asdf, stations=None):
    """
    Extract station information from adjoint source group
    """
    if isinstance(asdf, str):
        ds = pyasdf.ASDFDataSet(asdf, mode='r')
    elif isinstance(asdf, pyasdf.ASDFDataSet):
        ds = asdf
    else:
        raise TypeError("Input asdf either be a filename or "
                        "pyasdf.ASDFDataSet")

    sta_dict = {}
    try:
        adjsrcs = ds.auxiliary_data.AdjointSources
    except:
        return {}

    if stations is None:
        stations = adjsrcs.list()
    for adj_name in stations:
        adj = getattr(adjsrcs, adj_name)
        pars = adj.parameters
        station_id = pars["station_id"]
        if station_id not in sta_dict:
            sta_dict[station_id] = [
                pars["latitude"], pars["longitude"], pars["elevation_in_m"],
                pars["depth_in_m"]
            ]

    return sta_dict
示例#22
0
    def append(self, dsfid, srcrcv=True, windows=True):
        """
        Simple function to parse information from a
        pyasdf.asdf_data_setASDFDataSet file and append it to the currect
        collection of information.

        :type dsfid: str
        :param dsfid: fid of the dataset
        :type srcrcv: bool
        :param srcrcv: gather source-receiver information
        :type windows: bool
        :param windows: gather window information
        """
        try:
            with pyasdf.ASDFDataSet(dsfid) as ds:
                if srcrcv:
                    self._get_srcrcv_from_dataset(ds)
                if windows:
                    try:
                        self._get_windows_from_dataset(ds)
                    except AttributeError as e:
                        if self.verbose:
                            print("error reading dataset: "
                                  "missing auxiliary data")
                return
        except OSError:
            if self.verbose:
                print(f"error reading dataset: already open")
            return
示例#23
0
def extract_station_info_from_asdf(asdf, verbose=False):
    """ extract the sensor type from stationxml in asdf file """
    if isinstance(asdf, str) or isinstance(asdf, unicode):
        ds = pyasdf.ASDFDataSet(asdf, mode='r')
    elif isinstance(asdf, pyasdf.ASDFDataSet):
        ds = asdf
    else:
        raise TypeError("Input asdf either be a filename or "
                        "pyasdf.ASDFDataSet")

    asdf_sensors = dict()
    ntotal = len(ds.waveforms)
    for idx, st_group in enumerate(ds.waveforms):
        if verbose:
            print("[%4d/%d]Station: %s" %
                  (idx, ntotal, st_group._station_name))
        try:
            inv = st_group.StationXML
            info = extract_staxml_info(inv)
            asdf_sensors.update(info)
        except Exception as msg:
            print("Failed to extract due to: %s" % msg)
            continue

    print("Number of stations and channels: %d, %d" %
          (ntotal, len(asdf_sensors)))

    return asdf_sensors
示例#24
0
文件: gmug.py 项目: cjhopp/scripts
def vibbox_to_asdf(files, inv, param_file):
    """
    Convert a list of vibbox files to ASDF files of the same name

    :param files: List of files to convert
    :param inventory: Inventory object to add to asdf files
    :param param_file: path to yaml config file for DUG-seis

    :return:
    """
    # Load in the parameters
    with open(param_file, 'r') as f:
        param = yaml.load(f, Loader=yaml.FullLoader)
    outdir = param['Acquisition']['asdf_settings']['data_folder']
    for afile in files:
        name = os.path.join(outdir,
                            afile.split('/')[-2],
                            afile.split('/')[-1].replace('.dat', '.h5'))
        if not os.path.isdir(os.path.dirname(name)):
            os.mkdir(os.path.dirname(name))
        if os.path.isfile(name):
            print('{} already written'.format(name))
            continue
        print('Writing {} to {}'.format(afile, name))
        try:
            st = vibbox_read(afile, param)
        except ValueError as e:  # Wrong number of samples?
            print(e)
            continue
        with pyasdf.ASDFDataSet(name, compression='gzip-3') as asdf:
            asdf.add_stationxml(inv)
            asdf.add_waveforms(st, tag='raw_recording')
    return
示例#25
0
def obs_asdf_process(dir_observe,eventname,min_period,max_period):
  ##the name of the file of the preprocessed data name and directory
  processdir=eventname+'preprocessed'
  processdata='preprocessed_'+ str(min_period) +'s_to_'+str(max_period)+'s.h5'
  print(processdata)
  if os.path.exists(processdir):
   os.system('rm -rf'+' '+processdir)
   os.makedirs(processdir)
   os.system('cp'+' '+dir_observe+' '+processdir+'/'+processdata)
  with pyasdf.ASDFDataSet(processdir+'/'+processdata) as ds_obs:
   list=ds_obs.waveforms.list()
   tag_name='preprocessed_'+ str(min_period) +'s_to_'+str(max_period)+'s'
   for _i,stid in enumerate(list):    
    stla,stlo,evz=ds_obs.waveforms[stid].coordinates.values()
    st_obs=ds_obs.waveforms[stid].raw_recording
    #st_syn_gre=ds_syn.waveforms[stid].displacement
    st_obs.detrend("linear")
    st_obs.detrend("demean")
    st_obs.taper(0.05, type="cosine")
    st_obs.filter("bandpass", freqmin=1.0 / max_period,
                  freqmax=1.0 / min_period, corners=3, zerophase=False)
    st_obs.detrend("linear")
    st_obs.detrend("demean")
    st_obs.taper(0.05, type="cosine")
    st_obs.filter("bandpass", freqmin=1.0 / max_period,
                  freqmax=1.0 / min_period, corners=3, zerophase=False)
    
    ds_obs.add_waveforms(st_obs,tag=tag_name)
    del ds_obs.waveforms[stid].raw_recording
    del ds_obs.waveforms[stid].preprocess
示例#26
0
    def queryByBBoxInterval(self, outputFileName, bbox, timeinterval, chan='*Z', bbpadding=2,
                            event_id=None, verbose=False):
        """ Time interval is a tuple (starttime,endtime)
        """
        assert len(timeinterval) == 2, "timeinterval must be a tuple of ascending timestamps. len=" + str(
            len(timeinterval)) + " " + str(timeinterval)

        query_ds = pyasdf.ASDFDataSet(outputFileName)

        client = Client(self._client)
        ref_inv = client.get_stations(network=self._network,
                                      starttime=UTCDateTime(timeinterval[0]),
                                      endtime=UTCDateTime(timeinterval[1]),
                                      minlongitude=bbox[0] - bbpadding,
                                      maxlongitude=bbox[1] + bbpadding,
                                      minlatitude=bbox[2] - bbpadding,
                                      maxlatitude=bbox[3] + bbpadding,
                                      level='channel')

        if verbose:
            print(ref_inv)

        ref_st = Stream()

        # go through inventory and request timeseries data
        for net in ref_inv:
            for stn in net:
                stime = UTCDateTime(timeinterval[0])
                etime = UTCDateTime(timeinterval[1])
                step = 3600*24*10
                while stime + step < etime:
                    try:
                        ref_st = client.get_waveforms(network=net.code, station=stn.code,
                                                      channel=chan, location='*',
                                                      starttime=stime,
                                                      endtime=stime+step)
                        print ref_st
                        self.ref_stations.append(net.code + '.' + stn.code)
                        st_inv = ref_inv.select(station=stn.code, channel=chan)
                        
                        query_ds.add_stationxml(st_inv)
                        for tr in ref_st:
                            query_ds.add_waveforms(tr, "reference_station")
                    except FDSNException:
                        print('Data not available from Reference Station: ' + stn.code)
                    # end try
                    stime += step
                #wend
        # end for

        #tr.write(os.path.join(os.path.dirname(outputFileName), tr.id + ".MSEED"),
        #         format="MSEED") # Don't write miniseed
        if verbose:
            print("Wrote Reference Waveforms to ASDF file: " + outputFileName)
            print('\nWaveform data query completed.')

        metaOutputFileName = os.path.join(os.path.dirname(outputFileName),
                                          'meta.%s.xml'%(os.path.basename(outputFileName)))
        ref_inv.write(metaOutputFileName, format="STATIONXML")
        del query_ds
示例#27
0
def compare_c2_c3_waveforms(c2file,c3file,maxlag,c2_maxlag,dt):
    '''
    use data type from c3file to plot the waveform for c2 and c3
    note that the length of c3file is shorter than c2file
    c2file: HDF5 file for normal cross-correlation function
    c3file: HDF5 file for C3 function
    maxlag: maximum time lag for C3
    c2_maxlag: maxinum time lag for C1
    dt: time increment
    '''

    c2file = '/Users/chengxin/Documents/Harvard/Kanto_basin/code/KANTO/CCF/2010_01_11.h5'
    c3file = '/Users/chengxin/Documents/Harvard/Kanto_basin/code/KANTO/CCF_C3/2010_01_11.h5'
    maxlag = 1000
    c2_maxlag = 1800
    dt = 0.05

    #-------time axis-------
    tt = np.arange(-maxlag/dt, maxlag/dt+1)*dt
    tt_c2 = np.arange(-c2_maxlag/dt, c2_maxlag/dt+1)*dt
    ind   = np.where(abs(tt_c2)<=-tt[0])[0]
    c3_waveform = np.zeros(tt.shape,dtype=np.float32)
    c2_waveform = np.zeros(tt_c2.shape,dtype=np.float32)

    #-------make station pairs--------
    ds_c2 = pyasdf.ASDFDataSet(c2file,mode='r')
    ds_c3 = pyasdf.ASDFDataSet(c3file,mode='r')

    #------loop through all c3 data_types-------
    data_type_c3 = ds_c3.auxiliary_data.list()
    for ii in range(len(data_type_c3)):
        path_c3 = ds_c3.auxiliary_data[data_type_c3[ii]].list()
        for jj in range(len(path_c3)):
            print(data_type_c3[ii],path_c3[jj])

            sta1 = data_type_c3[ii].split('s')[1]
            sta2 = path_c3[jj].split('s')[1]
            c3_waveform = ds_c3.auxiliary_data[data_type_c3[ii]][path_c3[jj]].data[:]
            c2_waveform = ds_c2.auxiliary_data[data_type_c3[ii]][path_c3[jj]].data[:]
            c1_waveform = c2_waveform[ind]
        
            plt.subplot(211)
            plt.plot(c3_waveform)
            plt.subplot(212)
            plt.plot(c1_waveform)
            plt.legend(sta1+'_'+sta2,loc='upper right')
            plt.show()
示例#28
0
def plot_multi_freq_stack(sfile,freqmin,freqmax,nfreq,ccomp,tags=None):
    '''
    plot the stacked ccfs for sta1-sta2 at multi-frequency bands between freqmin-freqmax. 
    this may be useful to show the dispersive property of the surface waves, which could 
    be used together with plot_ZH_pmotion to identify surface wave components

    example: plot_multi_freq_stack('/Users/chengxin/Documents/Harvard/Kanto_basin/Mesonet_BW/STACK/E.AYHM/E.AYHM_E.BKKM.h5',0.08,6,15,'RR')
    '''
    #---basic parameters----
    maxlag = 100

    #------set path and type-------------
    ds = pyasdf.ASDFDataSet(sfile,mode='r')
    slist = ds.auxiliary_data.list()

    #-----check tags information------
    if not tags:
        tags = "Allstacked"
    
    if tags not in slist:
        raise ValueError('tags %s not in the file %s' % (tags,sfile))
    
    #--------read the data and parameters------------
    parameters = ds.auxiliary_data[tags][ccomp].parameters
    corr = ds.auxiliary_data[tags][ccomp].data[:]
    sampling_rate = int(1/parameters['dt'])
    npts = int(2*sampling_rate*parameters['lag'])
    indx = npts//2
    tt = np.arange(-maxlag*sampling_rate, maxlag*sampling_rate+1)/sampling_rate
    
    #------make frequency information-----
    freq = np.zeros(nfreq,dtype=np.float32)
    step = (np.log(freqmin)-np.log(freqmax))/(nfreq-1)
    
    for ii in range(nfreq):
        freq[ii]=np.exp(np.log(freqmin)-ii*step)

    indx0 = maxlag*sampling_rate
    #----loop through each freq-----
    for ii in range(1,nfreq-1):
        if ii==1:
            plt.figure(figsize=(9,6))

        f1 = freq[ii-1]
        f2 = freq[ii+1]
        ncorr = bandpass(corr,f1,f2,sampling_rate,corners=4,zerophase=True)
        ncorr = ncorr/max(ncorr)

        #------plot the signals-------
        plt.plot(tt,ncorr[indx-indx0:indx+indx0+1]+ii,'k-',linewidth=0.6)
        ttext = '{0:4.2f}-{1:4.2f} Hz'.format(f1,f2)
        plt.text(maxlag*0.9,ii+0.3,ttext,fontsize=6)
        if ii==1:
            plt.title('%s at %s component' % (sfile.split('/')[-1],ccomp))
            plt.xlabel('time [s]')
            plt.ylabel('waveform #')
    plt.grid(True)
    plt.show()
    del ds
示例#29
0
 def _get_asdf_dataset(self):
     try:
         ds = pyasdf.ASDFDataSet(self.asdf_path, mode="a")
         self._log.debug("Opened ASDF dataset '{}'.".format(self.asdf_path))
     except Exception as e:
         raise Exception("Failed to create ASDF file at '{}': {}.".format(
             self.asdf_path, e))
     return ds
示例#30
0
def process(input_asdf, output_folder, start_date, end_date, length):
    """
    INPUT_ASDF: Path to input ASDF file\n
    OUTPUT_FOLDER: Output folder \n

    Example usage:
    mpirun -np 2 python asdf2mseed.py ./test.asdf /tmp/output --start-date 2013-01-01T00:00:00 --end-date 2016-01-01T00:00:00

    Note: 
    """

    try:
        start_date = UTCDateTime(start_date).timestamp if start_date else None
        end_date = UTCDateTime(end_date).timestamp if end_date else None
        length = int(length)
    except:
        assert 0, 'Invalid input'
    # end try

    comm = MPI.COMM_WORLD
    nproc = comm.Get_size()
    rank = comm.Get_rank()
    proc_stations = defaultdict(list)
    ds = pyasdf.ASDFDataSet(input_asdf)

    if (rank == 0):
        # split work over stations in ds1 for the time being

        stations = ds.get_all_coordinates().keys()
        meta = ds.get_all_coordinates()
        count = 0
        for iproc in np.arange(nproc):
            for istation in np.arange(np.divide(len(stations), nproc)):
                proc_stations[iproc].append(stations[count])
                count += 1
        # end for
        for iproc in np.arange(np.mod(len(stations), nproc)):
            proc_stations[iproc].append(stations[count])
            count += 1
        # end for

        # output station meta-data
        fn = os.path.join(output_folder, 'stations.txt')
        f = open(fn, 'w+')
        f.write('#Station\t\tLongitude\t\tLatitude\n')
        for sn in stations:
            f.write('%s\t\t%f\t\t%f\n' %
                    (sn, meta[sn]['longitude'], meta[sn]['latitude']))
        # end for
        f.close()
    # end if

    # broadcast workload to all procs
    proc_stations = comm.bcast(proc_stations, root=0)

    print proc_stations
    dump_traces(ds, proc_stations[rank], start_date, end_date, length,
                output_folder)