示例#1
0
    def test_ReadStations(self):

        base_path = os.getenv('WAVELOC_PATH')
        stations_file = os.path.join(base_path, 'test_data',
                                     'coord_stations_test')
        stations = read_stations_file(stations_file)
        self.assertTrue('RVL' in stations)
        self.assertAlmostEqual(stations['SNE']['x'], 366.958)
示例#2
0
def do_comp_mag(opdict):

    base_path = opdict['base_path']

    # dataless
    dataless_glob = glob.glob(
        os.path.join(base_path, 'lib', opdict['dataless']))
    dataless_glob.sort()

    # output directory
    output_dir = os.path.join(base_path, 'out', opdict['outdir'])

    # data
    data_dir = os.path.join(base_path, 'data', opdict['datadir'])
    data_glob = opdict['dataglob']

    # location file
    locdir = os.path.join(base_path, 'out', opdict['outdir'], 'loc')
    locfile = os.path.join(locdir, 'locations.dat')
    locs = read_locs_from_file(locfile)
    opdict = read_header_from_file(locfile, opdict)
    snr_wf = np.float(opdict['snr_tr_limit'])

    # Stations
    stations_filename = os.path.join(base_path, 'lib', opdict['stations'])
    stations = read_stations_file(stations_filename)

    paz = read_paz(dataless_glob)

    vals, tdeb = {}, {}
    for sta in sorted(stations):
        vals[sta] = {}
        tdeb[sta] = {}

    cha_list = opdict['comp_list']
    for cha in cha_list:
        vals, tdeb, dt = fill_values(vals, tdeb, data_glob, data_dir, cha)

    new_file = open(locfile, 'w')
    write_header_options(new_file, opdict)

    mags = []
    for loc in locs:
        stack_time = loc['o_time']
        loc_x = loc['x_mean']
        loc_y = loc['y_mean']
        loc_z = -loc['z_mean']

        ml = []
        for sta in sorted(stations):
            if vals[sta]:
                paz_list, p2p_amp, tspan = [], [], []
                h_dist = np.sqrt((loc_x - stations[sta]['x'])**2 +
                                 (loc_y - stations[sta]['y'])**2 +
                                 (loc_z - stations[sta]['elev'])**2)
                for cha in cha_list:
                    istart = int(
                        round(stack_time - 0.5 - tdeb[sta][cha]) * 1. / dt)
                    iend = int(
                        round(stack_time + 5.5 - tdeb[sta][cha]) * 1. / dt)

                    x = vals[sta][cha][istart:iend + 1]

                    if x.any() and np.max(x) / np.mean(np.abs(x)) > snr_wf:
                        max_amp = np.max(x)
                        i_max_amp = np.argmax(x)
                        min_amp = np.abs(np.min(x))
                        i_min_amp = np.argmin(x)

                        paz_list.append(paz[sta][cha])
                        tspan.append(np.abs(i_max_amp - i_min_amp) * dt)
                        p2p_amp.append(max_amp + min_amp)

                        if opdict['verbose']:
                            fig = plt.figure()
                            fig.set_facecolor('white')
                            plt.plot(x)
                            plt.plot(i_min_amp, x[i_min_amp], 'ro')
                            plt.plot(i_max_amp, x[i_max_amp], 'ro')
                            plt.title('%s,%s' % (sta, cha))
                            plt.show()

                if paz_list:
                    mag = estimateMagnitude(paz_list, p2p_amp, tspan, h_dist)
                    ml.append(mag)

        new_file.write(
            "Max = %.2f, %s - %.2f s + %.2f s, x= %.4f pm %.4f km, y= %.4f pm %.4f km, z= %.4f pm %.4f km, ml= %.2f pm %.2f\n"
            % (loc['max_trig'], loc['o_time'].isoformat(), loc['o_err_left'],
               loc['o_err_right'], loc['x_mean'], loc['x_sigma'],
               loc['y_mean'], loc['y_sigma'], loc['z_mean'], loc['z_sigma'],
               np.mean(ml), np.std(ml)))

        if ml:
            mags.append(np.mean(ml))

    new_file.close()

    r = np.arange(-3, 3, 0.1)
    p, logN, i1, i2 = bvalue(mags, r)
    print "b-value:", -p[0]

    if opdict['verbose']:
        fig = plt.figure(figsize=(10, 5))
        fig.set_facecolor('white')
        ax1 = fig.add_subplot(121)
        ax1.hist(mags, 25)
        ax1.set_xlabel('Magnitude')

        ax2 = fig.add_subplot(122, title='Gutenberg Richter law')
        ax2.plot(r, logN)
        ax2.plot(r[i1:i2], np.polyval(p, r[i1:i2]), 'r')
        ax2.set_xlabel('Magnitude')
        ax2.set_ylabel('log N')
        plt.show()
示例#3
0
def do_locations_prob_setup_and_run(opdict):

  # get / set info
  base_path=opdict['base_path']
  space_only = opdict['probloc_spaceonly']

  locfile=os.path.join(base_path,'out',opdict['outdir'],'loc','locations.dat')
  locfile_prob=os.path.join(base_path,'out',opdict['outdir'],'loc','locations_prob.dat')
  locfile_hdf5=os.path.join(base_path,'out',opdict['outdir'],'loc','locations_prob.hdf5')
  f_prob=open(locfile_prob,'w')

  # if locfile does not exist then make it by running trigger location
  if not os.path.exists(locfile):
    logging.info('No location found at %s.  Running trigger location first...'%locfile)
    do_locations_trigger_setup_and_run(opdict)

  # directories
  grid_dir=os.path.join(base_path,'out',opdict['outdir'],'grid')
  output_dir=os.path.join(base_path,'out',opdict['outdir'])

  # data files
  data_dir=os.path.join(base_path,'data',opdict['datadir'])
  data_glob=opdict['dataglob']
  kurt_glob=opdict['kurtglob']
  grad_glob=opdict['gradglob']
  data_files=glob.glob(os.path.join(data_dir,data_glob))
  kurt_files=glob.glob(os.path.join(data_dir,kurt_glob))
  grad_files=glob.glob(os.path.join(data_dir,grad_glob))
  data_files.sort()
  kurt_files.sort()
  grad_files.sort()

  # stations
  stations_filename=os.path.join(base_path,'lib',opdict['stations'])
  stations=read_stations_file(stations_filename)

  # grids
  grid_filename_base=os.path.join(base_path,'lib',opdict['time_grid'])
  search_grid_filename=os.path.join(base_path,'lib',opdict['search_grid'])

  # read time grid information
  time_grids=get_interpolated_time_grids(opdict)

  # read locations
  locs=read_locs_from_file(locfile)

  # prepare file for output of marginals
  f_marginals = h5py.File(locfile_hdf5,'w')

  # iterate over locations
  for loc in locs:

    # create the appropriate grid on the fly

    # generate the grids
    o_time=loc['o_time']
    if space_only:
        start_time=o_time
        end_time  =o_time
    else:
        start_time=o_time-3*loc['o_err_left']
        end_time=o_time+3*loc['o_err_right']

    # make a buffer for migration
    start_time_migration = start_time - 10.0
    end_time_migration   =   end_time + 10.0

    # re-read grid info to ensure clean copy
    grid_info=read_hdr_file(search_grid_filename)
 
    # read data
    grad_dict,delta = read_data_compatible_with_time_dict(grad_files,
          time_grids, start_time_migration, end_time_migration)

    # do migration (all metadata on grid is added to grid_info)
    do_migration_loop_continuous(opdict, grad_dict, delta,
          start_time_migration, grid_info, time_grids, keep_grid=True)


    # integrate to get the marginal probability density distributions

    # get required info
    grid_starttime=grid_info['start_time']
    nx,ny,nz,nt=grid_info['grid_shape']
    dx,dy,dz,dt=grid_info['grid_spacing']
    x_orig,y_orig,z_orig=grid_info['grid_orig']

    # we are only interested in the time around the origin time of the event
    it_left  = np.int(np.round((start_time - grid_starttime)/dt))
    it_right = np.int(np.round((end_time   - grid_starttime)/dt))
    it_true  = np.int(np.round((o_time     - grid_starttime)/dt))
    nt=(it_right-it_left)+1

    # set up integration axes (wrt reference)
    x=np.arange(nx)*dx
    y=np.arange(ny)*dy
    z=np.arange(nz)*dz
    if not space_only:
      t=np.arange(nt)*dt

    # open the grid file
    grid_filename=grid_info['dat_file']
    f=h5py.File(grid_filename,'r')
    stack_grid=f['stack_grid']

    # extract the portion of interest (copy data)
    if space_only:
        stack_3D=np.empty((nx,ny,nz))
        stack_3D[:] = stack_grid[:,it_true].reshape(nx,ny,nz)
    else:
        stack_4D=np.empty((nx,ny,nz,nt))
        stack_4D[:] = stack_grid[:,it_left:it_right+1].reshape(nx,ny,nz,nt)

    # close the grid file
    f.close()

    # Get expected values (normalizes grid internally)
    if space_only:
        exp_x, exp_y, exp_z, cov_matrix, prob_dict = \
            compute_expected_coordinates3D(stack_3D,x,y,z,return_2Dgrids=True)
    else:
        exp_x, exp_y, exp_z, exp_t, cov_matrix, prob_dict = \
            compute_expected_coordinates4D(stack_4D,x,y,z,t,return_2Dgrids=True)
    


    # put reference location back
    exp_x = exp_x + x_orig
    exp_y = exp_y + y_orig
    exp_z = exp_z + z_orig
    if space_only:
        exp_t = o_time
    else:
        exp_t = start_time + exp_t

    # extract uncertainties from covariance matrix
    if space_only:
        sig_x,sig_y,sig_z = np.sqrt(np.diagonal(cov_matrix))
        sig_t = (loc['o_err_left']+loc['o_err_right'])/2.
    else:
        sig_x,sig_y,sig_z,sig_t = np.sqrt(np.diagonal(cov_matrix))


    # save the marginals to a hdf5 file in loc subdirectory (f_marginals)
    # each event becomes a group in this one file
    grp = f_marginals.create_group(exp_t.isoformat())
    grp.create_dataset('x',data=x+x_orig)
    grp.create_dataset('y',data=y+y_orig)
    grp.create_dataset('z',data=z+z_orig)
    grp.create_dataset('prob_x',data=prob_dict['prob_x0'])
    grp.create_dataset('prob_y',data=prob_dict['prob_x1'])
    grp.create_dataset('prob_z',data=prob_dict['prob_x2'])
    grp.create_dataset('prob_xy',data=prob_dict['prob_x0_x1'])
    grp.create_dataset('prob_xz',data=prob_dict['prob_x0_x2'])
    grp.create_dataset('prob_yz',data=prob_dict['prob_x1_x2'])
    if not space_only:
        grp.create_dataset('t',data=t-(o_time - start_time))
        grp.create_dataset('prob_t',data=prob_dict['prob_x3'])
        grp.create_dataset('prob_xt',data=prob_dict['prob_x0_x3'])
        grp.create_dataset('prob_yt',data=prob_dict['prob_x1_x3'])
        grp.create_dataset('prob_zt',data=prob_dict['prob_x2_x3'])


    # write the expected values to a plain text locations file

    f_prob.write("PROB DENSITY : T = %s s pm %.2f s, x= %.4f pm %.4f km, \
y= %.4f pm %.4f km, z= %.4f pm %.4f km\n" % (exp_t.isoformat(), sig_t, \
      exp_x, sig_x, exp_y, sig_y, exp_z, sig_z))

  # close location files
  f_prob.close()
  f_marginals.close()
示例#4
0
def do_clustering_setup_and_run(opdict):

  base_path=opdict['base_path']
  verbose=opdict['verbose']

  # stations
  stations_filename=os.path.join(base_path,'lib',opdict['stations'])

  # output directory
  output_dir=os.path.join(base_path,'out',opdict['outdir'])

  # data
  data_dir=os.path.join(base_path,'data',opdict['datadir'])
  data_glob=opdict['dataglob']
  data_files=glob.glob(os.path.join(data_dir,data_glob))
  data_files.sort()

  # location file
  locdir=os.path.join(base_path,'out',opdict['outdir'],'loc')
  loc_filename=os.path.join(locdir,'locations.dat')

  # file containing correlation values
  coeff_file=os.path.join(locdir,opdict['xcorr_corr'])
  # Read correlation values
  b=BinaryFile(coeff_file)
  coeff=b.read_binary_file()

  # file containing time delays
  delay_file=os.path.join(locdir,opdict['xcorr_delay'])

  # INPUT PARAMETERS
  nbmin=int(opdict['nbsta'])
  if nbmin > len(coeff.keys()):
    raise Error('the minimum number of stations cannot be > to the number of stations !!')
  event=len(coeff.values()[0])
  tplot=float(opdict['clus']) # threshold for which we save and plot 
  cluster_file="%s/cluster-%s-%s"%(locdir,str(tplot),str(nbmin))

  corr=[opdict['clus']]
  #corr=np.arange(0,1.1,0.1)
  for threshold in corr:
    threshold=float(threshold)
    nbsta=compute_nbsta(event,coeff,threshold)

    CLUSTER = do_clustering(event,nbsta,nbmin)

    if threshold == tplot:

      print "----------------------------------------------"
      print "THRESHOLD : ",threshold," # STATIONS : ",nbmin
      print "# CLUSTERS : ",len(CLUSTER)
      print CLUSTER

      c=BinaryFile(cluster_file)
      c.write_binary_file(CLUSTER)
      print "Written in %s"%cluster_file

      if verbose: # PLOT
        # Read location file
        locs=read_locs_from_file(loc_filename)
        # Read station file 
        stations=read_stations_file(stations_filename)

        # Look at the waveforms 
        plot_traces(CLUSTER, delay_file, coeff, locs, stations, data_dir, data_files, threshold)
示例#5
0
def generateSyntheticDirac(opdict, time_grids=None):
    # Creates the synthetic dataset for us to work with

    from NllGridLib import read_stations_file, read_hdr_file
    from migration import migrate_4D_stack, extract_max_values
    from hdf5_grids import get_interpolated_time_grids

    load_time_grids = False
    if time_grids == None: load_time_grids = True

    #define length and sampling frequency of synthetic data
    s_amplitude = opdict['syn_amplitude']
    s_data_length = opdict['syn_datalength']
    s_sample_freq = opdict['syn_samplefreq']
    s_filename = opdict['syn_filename']

    s_npts = int(s_data_length * s_sample_freq)
    s_delta = 1 / s_sample_freq
    s_kwidth = opdict['syn_kwidth']
    s_nkwidth = int(round(s_kwidth * s_sample_freq))

    # define origin time
    s_t0 = opdict['syn_otime']

    base_path = opdict['base_path']
    outdir = opdict['outdir']
    test_grid_file = os.path.join(base_path, 'out', opdict['outdir'], 'grid',
                                  s_filename)
    test_stack_file = os.path.join(base_path, 'out', opdict['outdir'], 'stack',
                                   'stack_all_' + s_filename)
    test_info_file = os.path.join(base_path, 'out', opdict['outdir'], 'grid',
                                  '%s.info' % s_filename)

    fig_path = os.path.join(base_path, 'out', outdir, 'fig')

    # get filenames for time-grids and search grids
    grid_filename_base = os.path.join(base_path, 'lib', opdict['time_grid'])
    search_grid_filename = os.path.join(base_path, 'lib',
                                        opdict['search_grid'])
    stations_filename = os.path.join(base_path, 'lib', opdict['stations'])
    stations = read_stations_file(stations_filename)

    if opdict.has_key('sta_list'):
        sta_list = opdict['sta_list'].split(',')
    else:
        sta_list = stations.keys()

    # get parameters for noise etc
    syn_addnoise = opdict['syn_addnoise']

    #################################
    # start setting up synthetic data
    #################################

    grid_info = read_hdr_file(search_grid_filename)

    if load_time_grids:
        time_grids = get_interpolated_time_grids(opdict)

    #################################
    # create synthetic data
    #################################

    # choose hypocenter
    nx = grid_info['nx']
    ny = grid_info['ny']
    nz = grid_info['nz']

    dx = grid_info['dx']
    dy = grid_info['dy']
    dz = grid_info['dz']

    x_orig = grid_info['x_orig']
    y_orig = grid_info['y_orig']
    z_orig = grid_info['z_orig']

    ix = opdict['syn_ix']
    iy = opdict['syn_iy']
    iz = opdict['syn_iz']
    it = int(round(s_t0 / s_delta))

    # retrieve travel times for chosen hypocenter
    # and station list
    ib = ix * ny * nz + iy * nz + iz
    n_buf = nx * ny * nz
    logging.debug('ib for true hypocenter = %d' % ib)
    ttimes = {}
    for sta in sta_list:
        if time_grids.has_key(sta):
            ttimes[sta] = time_grids[sta].grid_data[ib]
        else:
            logging.info(
                'Missing travel-time information for station %s. Ignoring station...'
                % sta)
    logging.debug('Travel-times for true hypocenter = %s' % ttimes)

    # construct data with these travel times
    data = {}
    for key, delay in ttimes.iteritems():
        if syn_addnoise:
            s_snr = opdict['syn_snr']
            s = np.random.rand(s_npts) * s_amplitude / s_snr
        else:
            s = np.zeros(s_npts)
        atime = s_t0 + delay
        i_atime = np.int(atime / s_delta)
        if i_atime + s_nkwidth > len(s):
            logging.error(
                'syn_datalength is too small compared with geographical size of network '
            )
        s[i_atime:i_atime +
          s_nkwidth] = s_amplitude - np.arange(s_nkwidth) * (s_amplitude /
                                                             float(s_nkwidth))
        data[key] = s

    # DO MIGRATION

    logging.info('Doing migration to %s' % test_grid_file)
    f = h5py.File(test_grid_file, 'w')
    stack_grid = f.create_dataset('stack_grid', (n_buf, s_npts),
                                  'f',
                                  chunks=(1, s_npts))
    stack_shift_time = migrate_4D_stack(data, s_delta, time_grids, stack_grid)
    n_buf, nt = stack_grid.shape

    # add useful information to dataset
    for key, value in grid_info.iteritems():
        stack_grid.attrs[key] = value
    stack_grid.attrs['dt'] = s_delta
    stack_grid.attrs['start_time'] = -stack_shift_time

    # extract max-stack
    logging.info('Extracting max_val etc. to %s' % test_stack_file)
    f_stack = h5py.File(test_stack_file, 'w')
    # extract maxima
    extract_max_values(stack_grid, grid_info, f_stack)
    for name in f_stack:
        dset = f_stack[name]
        logging.debug('After extract_max_values : %s %f %f' %
                      (name, np.max(dset), np.sum(dset)))
        dset.attrs['start_time'] = -stack_shift_time
        dset.attrs['dt'] = s_delta

    # close the stack and grid files
    f_stack.close()
    f.close()
    logging.info('Saved 4D grid to file %s' % test_grid_file)

    shifted_it = it + int(round(stack_shift_time / s_delta))

    # SETUP information to pass back
    test_info = {}
    test_info['dat_file'] = test_grid_file
    test_info['stack_file'] = test_stack_file
    test_info['grid_shape'] = nx, ny, nz, nt
    test_info['grid_spacing'] = dx, dy, dz, s_delta
    test_info['grid_orig'] = x_orig, y_orig, z_orig
    test_info['true_indexes'] = (ix, iy, iz, shifted_it)
    test_info['start_time'] = -stack_shift_time

    logging.debug(test_info)
    f = open(test_info_file, 'w')
    f.write(str(test_info))

    return test_info
示例#6
0
def do_comp_mag(opdict):

  base_path=opdict['base_path']

  # dataless
  dataless_glob=glob.glob(os.path.join(base_path,'lib',opdict['dataless']))
  dataless_glob.sort()

  # output directory
  output_dir=os.path.join(base_path,'out',opdict['outdir'])

  # data
  data_dir=os.path.join(base_path,'data',opdict['datadir'])
  data_glob=opdict['dataglob']

  # location file
  locdir=os.path.join(base_path,'out',opdict['outdir'],'loc')
  locfile=os.path.join(locdir,'locations.dat')
  locs=read_locs_from_file(locfile)
  opdict=read_header_from_file(locfile,opdict)
  snr_wf=np.float(opdict['snr_tr_limit'])

  # Stations
  stations_filename=os.path.join(base_path,'lib',opdict['stations'])
  stations=read_stations_file(stations_filename)

  paz=read_paz(dataless_glob)

  vals,tdeb={},{}
  for sta in sorted(stations):
    vals[sta]={}
    tdeb[sta]={}

  cha_list=opdict['comp_list']
  for cha in cha_list:
    vals,tdeb,dt=fill_values(vals,tdeb,data_glob,data_dir,cha)

  new_file=open(locfile,'w')
  write_header_options(new_file,opdict)

  mags=[]
  for loc in locs:
    stack_time=loc['o_time']
    loc_x=loc['x_mean']
    loc_y=loc['y_mean']
    loc_z=-loc['z_mean']

    ml=[]
    for sta in sorted(stations):
      if vals[sta]:
        paz_list,p2p_amp,tspan=[],[],[]
        h_dist=np.sqrt((loc_x-stations[sta]['x'])**2+(loc_y-stations[sta]['y'])**2+(loc_z-stations[sta]['elev'])**2)
        for cha in cha_list:
          istart=int(round(stack_time-0.5-tdeb[sta][cha])*1./dt)
          iend=int(round(stack_time+5.5-tdeb[sta][cha])*1./dt)

          x=vals[sta][cha][istart:iend+1]

          if x.any() and np.max(x)/np.mean(np.abs(x)) > snr_wf:
            max_amp=np.max(x)
            i_max_amp=np.argmax(x)
            min_amp=np.abs(np.min(x))
            i_min_amp=np.argmin(x)
            
            paz_list.append(paz[sta][cha])
            tspan.append(np.abs(i_max_amp-i_min_amp)*dt)
            p2p_amp.append(max_amp+min_amp)

            if opdict['verbose']:
              fig=plt.figure()
              fig.set_facecolor('white')
              plt.plot(x)
              plt.plot(i_min_amp,x[i_min_amp],'ro')
              plt.plot(i_max_amp,x[i_max_amp],'ro')
              plt.title('%s,%s'%(sta,cha))
              plt.show()

        if paz_list:
          mag=estimateMagnitude(paz_list,p2p_amp,tspan,h_dist)
          ml.append(mag)

    new_file.write("Max = %.2f, %s - %.2f s + %.2f s, x= %.4f pm %.4f km, y= %.4f pm %.4f km, z= %.4f pm %.4f km, ml= %.2f pm %.2f\n"%(loc['max_trig'],loc['o_time'].isoformat(),loc['o_err_left'], loc['o_err_right'],loc['x_mean'],loc['x_sigma'],loc['y_mean'],loc['y_sigma'],loc['z_mean'],loc['z_sigma'],np.mean(ml),np.std(ml)))

    if ml:
      mags.append(np.mean(ml))

  new_file.close()

  r=np.arange(-3,3,0.1)
  p,logN,i1,i2 = bvalue(mags,r)
  print "b-value:",-p[0] 

  if opdict['verbose']:
    fig=plt.figure(figsize=(10,5))
    fig.set_facecolor('white')
    ax1 = fig.add_subplot(121)
    ax1.hist(mags,25)
    ax1.set_xlabel('Magnitude')

    ax2 = fig.add_subplot(122,title='Gutenberg Richter law')
    ax2.plot(r,logN)
    ax2.plot(r[i1:i2],np.polyval(p,r[i1:i2]),'r')
    ax2.set_xlabel('Magnitude')
    ax2.set_ylabel('log N')
    plt.show()
示例#7
0
def do_plotting_setup_and_run(opdict, plot_wfm=True, plot_grid=True):

    # get / set info
    base_path = opdict['base_path']

    locfile = os.path.join(base_path, 'out', opdict['outdir'], 'loc',
                           'locations.dat')
    stackfile = os.path.join(base_path, 'out', opdict['outdir'], 'stack',
                             'combined_stack_all.hdf5')
    grid_dir = os.path.join(base_path, 'out', opdict['outdir'], 'grid')
    output_dir = os.path.join(base_path, 'out', opdict['outdir'])

    data_dir = os.path.join(base_path, 'data', opdict['datadir'])

    data_glob = opdict['dataglob']
    data_files = glob.glob(os.path.join(data_dir, data_glob))
    data_files.sort()

    kurt_glob = opdict['kurtglob']
    kurt_files = glob.glob(os.path.join(data_dir, kurt_glob))
    kurt_files.sort()
    mig_files = kurt_files

    if opdict['kderiv']:
        grad_glob = opdict['gradglob']
        grad_files = glob.glob(os.path.join(data_dir, grad_glob))
        grad_files.sort()
        mig_files = grad_files

        if opdict['gauss']:
            gauss_glob = opdict['gaussglob']
            gauss_files = glob.glob(os.path.join(data_dir, gauss_glob))
            gauss_files.sort()
            mig_files = gauss_files

    figdir = os.path.join(base_path, 'out', opdict['outdir'], 'fig')

    # stations
    stations_filename = os.path.join(base_path, 'lib', opdict['stations'])
    stations = read_stations_file(stations_filename)

    # grids
    grid_filename_base = os.path.join(base_path, 'lib', opdict['time_grid'])
    search_grid_filename = os.path.join(base_path, 'lib',
                                        opdict['search_grid'])
    # read time grid information
    time_grids = get_interpolated_time_grids(opdict)

    # read locations
    locs = read_locs_from_file(locfile)

    # open stack file
    f_stack = h5py.File(stackfile, 'r')
    max_val = f_stack['max_val_smooth']
    stack_start_time = UTCDateTime(max_val.attrs['start_time'])

    for loc in locs:
        # generate the grids
        o_time = loc['o_time']
        start_time = o_time - opdict['plot_tbefore']
        end_time = o_time + opdict['plot_tafter']

        # re-read grid info to ensure clean copy
        grid_info = read_hdr_file(search_grid_filename)
        nx = grid_info['nx']
        ny = grid_info['ny']
        nz = grid_info['nz']
        dx = grid_info['dx']
        dy = grid_info['dy']
        dz = grid_info['dz']

        x = loc['x_mean']
        y = loc['y_mean']
        z = loc['z_mean']
        # get the corresponding travel-times for time-shifting
        ttimes = {}
        for sta in time_grids.keys():
            ttimes[sta] = time_grids[sta].value_at_point(x, y, z)

        tshift_migration = max(ttimes.values())

        start_time_migration = start_time - tshift_migration
        end_time_migration = end_time + tshift_migration

        if plot_grid:
            logging.info('Plotting grid for location %s' % o_time.isoformat())
            # TODO implement a rough estimation of the stack shift based on propagation time across the whole network

            # read data
            mig_dict, delta = read_data_compatible_with_time_dict(
                mig_files, time_grids, start_time_migration,
                end_time_migration)

            # do migration
            do_migration_loop_continuous(opdict,
                                         mig_dict,
                                         delta,
                                         start_time_migration,
                                         grid_info,
                                         time_grids,
                                         keep_grid=True)

            # plot
            plotLocationGrid(loc, grid_info, figdir,
                             opdict['plot_otime_window'])

        if plot_wfm:

            logging.info('Plotting waveforms for location %s' %
                         o_time.isoformat())

            # get the index of the location
            #      ix=np.int(np.round((loc['x_mean']-grid_info['x_orig'])/dx))
            #      iy=np.int(np.round((loc['y_mean']-grid_info['y_orig'])/dy))
            #      iz=np.int(np.round((loc['z_mean']-grid_info['z_orig'])/dz))
            #      ib= ix*ny*nz + iy*nz + iz

            # read data
            data_dict, delta = read_data_compatible_with_time_dict(
                data_files, time_grids, start_time_migration,
                end_time_migration)
            mig_dict, delta = read_data_compatible_with_time_dict(
                mig_files, time_grids, start_time_migration,
                end_time_migration)

            # cut desired portion out of data
            for sta in data_dict.keys():
                tmp = data_dict[sta]
                istart = np.int(
                    np.round(
                        (start_time + ttimes[sta] - start_time_migration) /
                        delta))
                iend = istart + np.int(
                    np.round((opdict['plot_tbefore'] + opdict['plot_tafter']) /
                             delta))
                # sanity check in case event is close to start or end of data
                if istart < 0: istart = 0
                if iend > len(tmp): iend = len(tmp)
                data_dict[sta] = tmp[istart:iend]
                # do slice
                tmp = mig_dict[sta]
                mig_dict[sta] = tmp[istart:iend]

            # retrieve relevant portion of stack max
            istart = np.int(
                np.round((o_time - opdict['plot_tbefore'] - stack_start_time) /
                         delta))
            iend = istart + np.int(
                np.round(
                    (opdict['plot_tbefore'] + opdict['plot_tafter']) / delta))
            # sanity check in case event is close to start or end of data
            if istart < 0:
                start_time = start_time + np.abs(istart) * dt
                istart = 0
            if iend > len(max_val): iend = len(max_val)
            # do slice
            stack_wfm = max_val[istart:iend]

            # plot
            plotLocationWaveforms(loc, start_time, delta, data_dict, mig_dict,
                                  stack_wfm, figdir)

    f_stack.close()
示例#8
0
def do_double_diff_setup_and_run(opdict):
    """
    Do double difference (outer routine). Takes options from a
    WavelocOptions.opdict dictionary.

    :param opdict: Dictionary of parameters and options
    """

    base_path = opdict['base_path']
    verbose = opdict['verbose']
    dd_loc = opdict['dd_loc']

    # Station
    stations_filename = os.path.join(base_path, 'lib', opdict['stations'])
    stations = read_stations_file(stations_filename)

    # Location file
    locdir = os.path.join(base_path, 'out', opdict['outdir'], 'loc')
    loc_filename = os.path.join(locdir, 'locations.dat')
    locs = read_locs_from_file(loc_filename)
    opdict = read_header_from_file(loc_filename, opdict)

    # ------------------------------------------------------------------------
    # search grid
    search_grid_filename = os.path.join(base_path, 'lib',
                                        opdict['search_grid'])
    # traveltimes grid
    grid_info = read_hdr_file(search_grid_filename)
    time_grids = get_interpolated_time_grids(opdict)

    # Extract the UTM coordinates of the area of study
    xstart = grid_info['x_orig']
    xend = xstart+grid_info['nx']*grid_info['dx']
    ystart = grid_info['y_orig']
    yend = ystart+grid_info['ny']*grid_info['dy']
    zend = -grid_info['z_orig']
    zstart = -(-zend+grid_info['nz']*grid_info['dz'])
    area = [xstart, xend, ystart, yend, zstart, zend]

    # ------------------------------------------------------------------------
    nbmin = int(opdict['nbsta'])
    threshold = float(opdict['clus'])

    # Correlation,  time delay and cluster files
    corr_file = os.path.join(locdir, opdict['xcorr_corr'])
    cfile = BinaryFile(corr_file)
    coeff = cfile.read_binary_file()

    delay_file = os.path.join(locdir, opdict['xcorr_delay'])
    dfile = BinaryFile(delay_file)
    delay = dfile.read_binary_file()

    cluster_file = os.path.join(locdir, 'cluster-%s-%s' % (str(threshold),
                                                           str(nbmin)))
    clfile = BinaryFile(cluster_file)
    cluster = clfile.read_binary_file()

    # ------------------------------------------------------------------------
    # Input parameters
    len_cluster_min = 2

    if dd_loc:
        new_loc_filename = os.path.join(locdir, 'relocations.dat')
        new_loc_file = open(new_loc_filename, 'w')
        write_header_options(new_loc_file, opdict)

    # ------------------------------------------------------------------------
    # Iterate over clusters
    for i in cluster.keys():
        print "CLUSTER %d:" % i, cluster[i], len(cluster[i])
        N = len(cluster[i])

        # Hypocentral parameters to be changed
        x, y, z, z_ph, to = coord_cluster(cluster[i], locs)

        # Replace bad locations by the centroid coordinates
        centroid_x = np.mean(x)
        centroid_y = np.mean(y)
        centroid_z = np.mean(z)

        for ii in range(len(cluster[i])):
            if np.abs(x[ii]-centroid_x) > .75:
                x[ii] = centroid_x
            if np.abs(y[ii]-centroid_y) > .75:
                y[ii] = centroid_y
            if np.abs(z[ii]-centroid_z) > .75:
                z[ii] = centroid_z

        if N > len_cluster_min:
            # Theroretical traveltimes and arrival times
            t_th, arr_times = traveltimes(x, y, z, to, stations, time_grids)
            # do double difference location
            x, y, z, to = do_double_diff(x, y, z, to, stations, coeff, delay,
                                         cluster[i], threshold, t_th,
                                         arr_times)

        if verbose:
            from clustering import compute_nbsta
            nbsta = compute_nbsta(len(locs), coeff, threshold)
            plot_events(cluster, locs, stations, x, y, z, i, threshold, nbmin,
                        area, nbsta)

        if dd_loc:
            ind = 0
        for j in cluster[i]:
            locs[j-1]['x_mean'] = x[ind]
            locs[j-1]['y_mean'] = y[ind]
            locs[j-1]['z_mean'] = z[ind]
            locs[j-1]['o_time'] = to[ind]
            locs[j-1]['x_sigma'] = 0
            locs[j-1]['y_sigma'] = 0
            locs[j-1]['z_sigma'] = 0
            locs[j-1]['o_err_right'] = 0
            locs[j-1]['o_err_left'] = 0
            ind += 1
            new_loc_file.write("Max = %.2f, %s - %.2f s + %.2f s, x= %.4f pm\
                %.4f km, y= %.4f pm %.4f km, z= %.4f pm %.4f km\n" %
                (locs[j-1]['max_trig'], locs[j-1]['o_time'].isoformat(),
                locs[j-1]['o_err_left'], locs[j-1]['o_err_right'],
                locs[j-1]['x_mean'], locs[j-1]['x_sigma'],
                locs[j-1]['y_mean'], locs[j-1]['y_sigma'],
                locs[j-1]['z_mean'], locs[j-1]['z_sigma']))

    if dd_loc:
        new_loc_file.close()
示例#9
0
def do_locations_prob_setup_and_run(opdict):

    # get / set info
    base_path = opdict['base_path']
    space_only = opdict['probloc_spaceonly']

    locfile = os.path.join(base_path, 'out', opdict['outdir'], 'loc',
                           'locations.dat')
    locfile_prob = os.path.join(base_path, 'out', opdict['outdir'], 'loc',
                                'locations_prob.dat')
    locfile_hdf5 = os.path.join(base_path, 'out', opdict['outdir'], 'loc',
                                'locations_prob.hdf5')
    f_prob = open(locfile_prob, 'w')

    # if locfile does not exist then make it by running trigger location
    if not os.path.exists(locfile):
        logging.info(
            'No location found at %s.  Running trigger location first...' %
            locfile)
        do_locations_trigger_setup_and_run(opdict)

    # directories
    grid_dir = os.path.join(base_path, 'out', opdict['outdir'], 'grid')
    output_dir = os.path.join(base_path, 'out', opdict['outdir'])

    # data files
    data_dir = os.path.join(base_path, 'data', opdict['datadir'])
    data_glob = opdict['dataglob']
    kurt_glob = opdict['kurtglob']
    grad_glob = opdict['gradglob']
    data_files = glob.glob(os.path.join(data_dir, data_glob))
    kurt_files = glob.glob(os.path.join(data_dir, kurt_glob))
    grad_files = glob.glob(os.path.join(data_dir, grad_glob))
    data_files.sort()
    kurt_files.sort()
    grad_files.sort()

    # stations
    stations_filename = os.path.join(base_path, 'lib', opdict['stations'])
    stations = read_stations_file(stations_filename)

    # grids
    grid_filename_base = os.path.join(base_path, 'lib', opdict['time_grid'])
    search_grid_filename = os.path.join(base_path, 'lib',
                                        opdict['search_grid'])

    # read time grid information
    time_grids = get_interpolated_time_grids(opdict)

    # read locations
    locs = read_locs_from_file(locfile)

    # prepare file for output of marginals
    f_marginals = h5py.File(locfile_hdf5, 'w')

    # iterate over locations
    for loc in locs:

        # create the appropriate grid on the fly

        # generate the grids
        o_time = loc['o_time']
        if space_only:
            start_time = o_time
            end_time = o_time
        else:
            start_time = o_time - 3 * loc['o_err_left']
            end_time = o_time + 3 * loc['o_err_right']

        # make a buffer for migration
        start_time_migration = start_time - 10.0
        end_time_migration = end_time + 10.0

        # re-read grid info to ensure clean copy
        grid_info = read_hdr_file(search_grid_filename)

        # read data
        grad_dict, delta = read_data_compatible_with_time_dict(
            grad_files, time_grids, start_time_migration, end_time_migration)

        # do migration (all metadata on grid is added to grid_info)
        do_migration_loop_continuous(opdict,
                                     grad_dict,
                                     delta,
                                     start_time_migration,
                                     grid_info,
                                     time_grids,
                                     keep_grid=True)

        # integrate to get the marginal probability density distributions

        # get required info
        grid_starttime = grid_info['start_time']
        nx, ny, nz, nt = grid_info['grid_shape']
        dx, dy, dz, dt = grid_info['grid_spacing']
        x_orig, y_orig, z_orig = grid_info['grid_orig']

        # we are only interested in the time around the origin time of the event
        it_left = np.int(np.round((start_time - grid_starttime) / dt))
        it_right = np.int(np.round((end_time - grid_starttime) / dt))
        it_true = np.int(np.round((o_time - grid_starttime) / dt))
        nt = (it_right - it_left) + 1

        # set up integration axes (wrt reference)
        x = np.arange(nx) * dx
        y = np.arange(ny) * dy
        z = np.arange(nz) * dz
        if not space_only:
            t = np.arange(nt) * dt

        # open the grid file
        grid_filename = grid_info['dat_file']
        f = h5py.File(grid_filename, 'r')
        stack_grid = f['stack_grid']

        # extract the portion of interest (copy data)
        if space_only:
            stack_3D = np.empty((nx, ny, nz))
            stack_3D[:] = stack_grid[:, it_true].reshape(nx, ny, nz)
        else:
            stack_4D = np.empty((nx, ny, nz, nt))
            stack_4D[:] = stack_grid[:, it_left:it_right + 1].reshape(
                nx, ny, nz, nt)

        # close the grid file
        f.close()

        # Get expected values (normalizes grid internally)
        if space_only:
            exp_x, exp_y, exp_z, cov_matrix, prob_dict = \
                compute_expected_coordinates3D(stack_3D,x,y,z,return_2Dgrids=True)
        else:
            exp_x, exp_y, exp_z, exp_t, cov_matrix, prob_dict = \
                compute_expected_coordinates4D(stack_4D,x,y,z,t,return_2Dgrids=True)

        # put reference location back
        exp_x = exp_x + x_orig
        exp_y = exp_y + y_orig
        exp_z = exp_z + z_orig
        if space_only:
            exp_t = o_time
        else:
            exp_t = start_time + exp_t

        # extract uncertainties from covariance matrix
        if space_only:
            sig_x, sig_y, sig_z = np.sqrt(np.diagonal(cov_matrix))
            sig_t = (loc['o_err_left'] + loc['o_err_right']) / 2.
        else:
            sig_x, sig_y, sig_z, sig_t = np.sqrt(np.diagonal(cov_matrix))

        # save the marginals to a hdf5 file in loc subdirectory (f_marginals)
        # each event becomes a group in this one file
        grp = f_marginals.create_group(exp_t.isoformat())
        grp.create_dataset('x', data=x + x_orig)
        grp.create_dataset('y', data=y + y_orig)
        grp.create_dataset('z', data=z + z_orig)
        grp.create_dataset('prob_x', data=prob_dict['prob_x0'])
        grp.create_dataset('prob_y', data=prob_dict['prob_x1'])
        grp.create_dataset('prob_z', data=prob_dict['prob_x2'])
        grp.create_dataset('prob_xy', data=prob_dict['prob_x0_x1'])
        grp.create_dataset('prob_xz', data=prob_dict['prob_x0_x2'])
        grp.create_dataset('prob_yz', data=prob_dict['prob_x1_x2'])
        if not space_only:
            grp.create_dataset('t', data=t - (o_time - start_time))
            grp.create_dataset('prob_t', data=prob_dict['prob_x3'])
            grp.create_dataset('prob_xt', data=prob_dict['prob_x0_x3'])
            grp.create_dataset('prob_yt', data=prob_dict['prob_x1_x3'])
            grp.create_dataset('prob_zt', data=prob_dict['prob_x2_x3'])

        # write the expected values to a plain text locations file

        f_prob.write("PROB DENSITY : T = %s s pm %.2f s, x= %.4f pm %.4f km, \
y= %.4f pm %.4f km, z= %.4f pm %.4f km\n"                                              % (exp_t.isoformat(), sig_t, \
          exp_x, sig_x, exp_y, sig_y, exp_z, sig_z))

    # close location files
    f_prob.close()
    f_marginals.close()
示例#10
0
def do_migration_setup_and_run(opdict):

    base_path = opdict['base_path']
    verbose = opdict['verbose']
    runtime = opdict['time']
    reloc = opdict['reloc']

    # stations
    stations_filename = os.path.join(base_path, 'lib', opdict['stations'])
    stations = read_stations_file(stations_filename)

    # output directory
    output_dir = os.path.join(base_path, 'out', opdict['outdir'])
    stack_dir = os.path.join(output_dir, 'stack')

    # data
    data_dir = os.path.join(base_path, 'data', opdict['datadir'])
    if opdict['kderiv']:
        data_glob = opdict['gradglob']
        if opdict['gauss']:
            data_glob = opdict['gaussglob']
    else:
        data_glob = opdict['kurtglob']
    data_files = glob.glob(os.path.join(data_dir, data_glob))
    data_files.sort()
    if len(data_files) == 0:
        logging.error('No data files found for %s and %s' %
                      (data_dir, data_glob))
        raise UserWarning

    # grids
    grid_filename_base = os.path.join(base_path, 'lib', opdict['time_grid'])
    search_grid_filename = os.path.join(base_path, 'lib',
                                        opdict['search_grid'])
    time_grids = get_interpolated_time_grids(opdict)

    #start and end times
    starttime = opdict['starttime']
    endtime = opdict['endtime']
    data_length = opdict['data_length']
    data_overlap = opdict['data_overlap']

    initial_start_time = utcdatetime.UTCDateTime(starttime)
    initial_end_time = initial_start_time + data_length

    final_end_time = utcdatetime.UTCDateTime(endtime)

    time_shift_secs = data_length - data_overlap

    ######### FOR EACH TIME SPAN - DO MIGRATION #############

    # start loop over time
    start_time = initial_start_time
    end_time = initial_end_time

    if runtime:
        t_ref = time()

    while (start_time < final_end_time):

        # read data
        logging.info("Reading data  : %s - %s." %
                     (start_time.isoformat(), end_time.isoformat()))
        data, delta = read_data_compatible_with_time_dict(
            data_files, time_grids, start_time, end_time)

        print len(data_files)

        if reloc:
            tr_glob = opdict['kurtglob']
            files = glob.glob(os.path.join(data_dir, tr_glob))
            traces, delta = read_data_compatible_with_time_dict(
                files, time_grids, start_time, end_time)
            sta_list = sorted(traces)
            for staname in sta_list:
                snr = np.max(traces[staname]) / np.mean(np.abs(
                    traces[staname]))
                if snr < opdict['reloc_snr']:
                    data[staname] = np.zeros(len(data[staname]))

        # re-read grid_info at each iteration to make sure it is a clean copy
        grid_info = read_hdr_file(search_grid_filename)

        # do migration if have enough data (3 is bare minimum)
        if len(data.keys()) >= 3:
            logging.info("Migrating data : %s - %s." %
                         (start_time.isoformat(), end_time.isoformat()))
            do_migration_loop_continuous(opdict, data, delta, start_time,
                                         grid_info, time_grids)
        elif len(data.keys()) == 0:
            logging.warn('No data found between %s and %s.' %
                         (start_time.isoformat(), end_time.isoformat()))
        else:
            logging.warn('Insufficient data found between %s and %s.' %
                         (start_time.isoformat(), end_time.isoformat()))

        # Reset the start and end times to loop again
        start_time = start_time + time_shift_secs
        end_time = end_time + time_shift_secs

    if runtime:
        t = time() - t_ref
        logging.info("Time for migrating all time slices : %.2f s\n" % (t))
示例#11
0
def do_double_diff_setup_and_run(opdict):
    """
    Do double difference (outer routine). Takes options from a
    WavelocOptions.opdict dictionary.

    :param opdict: Dictionary of parameters and options
    """

    base_path = opdict['base_path']
    verbose = opdict['verbose']
    dd_loc = opdict['dd_loc']

    # Station
    stations_filename = os.path.join(base_path, 'lib', opdict['stations'])
    stations = read_stations_file(stations_filename)

    # Location file
    locdir = os.path.join(base_path, 'out', opdict['outdir'], 'loc')
    loc_filename = os.path.join(locdir, 'locations.dat')
    locs = read_locs_from_file(loc_filename)
    opdict = read_header_from_file(loc_filename, opdict)

    # ------------------------------------------------------------------------
    # search grid
    search_grid_filename = os.path.join(base_path, 'lib',
                                        opdict['search_grid'])
    # traveltimes grid
    grid_info = read_hdr_file(search_grid_filename)
    time_grids = get_interpolated_time_grids(opdict)

    # Extract the UTM coordinates of the area of study
    xstart = grid_info['x_orig']
    xend = xstart + grid_info['nx'] * grid_info['dx']
    ystart = grid_info['y_orig']
    yend = ystart + grid_info['ny'] * grid_info['dy']
    zend = -grid_info['z_orig']
    zstart = -(-zend + grid_info['nz'] * grid_info['dz'])
    area = [xstart, xend, ystart, yend, zstart, zend]

    # ------------------------------------------------------------------------
    nbmin = int(opdict['nbsta'])
    threshold = float(opdict['clus'])

    # Correlation,  time delay and cluster files
    corr_file = os.path.join(locdir, opdict['xcorr_corr'])
    cfile = BinaryFile(corr_file)
    coeff = cfile.read_binary_file()

    delay_file = os.path.join(locdir, opdict['xcorr_delay'])
    dfile = BinaryFile(delay_file)
    delay = dfile.read_binary_file()

    cluster_file = os.path.join(locdir,
                                'cluster-%s-%s' % (str(threshold), str(nbmin)))
    clfile = BinaryFile(cluster_file)
    cluster = clfile.read_binary_file()

    # ------------------------------------------------------------------------
    # Input parameters
    len_cluster_min = 2

    if dd_loc:
        new_loc_filename = os.path.join(locdir, 'relocations.dat')
        new_loc_file = open(new_loc_filename, 'w')
        write_header_options(new_loc_file, opdict)

    # ------------------------------------------------------------------------
    # Iterate over clusters
    for i in cluster.keys():
        print "CLUSTER %d:" % i, cluster[i], len(cluster[i])
        N = len(cluster[i])

        # Hypocentral parameters to be changed
        x, y, z, z_ph, to = coord_cluster(cluster[i], locs)

        # Replace bad locations by the centroid coordinates
        centroid_x = np.mean(x)
        centroid_y = np.mean(y)
        centroid_z = np.mean(z)

        for ii in range(len(cluster[i])):
            if np.abs(x[ii] - centroid_x) > .75:
                x[ii] = centroid_x
            if np.abs(y[ii] - centroid_y) > .75:
                y[ii] = centroid_y
            if np.abs(z[ii] - centroid_z) > .75:
                z[ii] = centroid_z

        if N > len_cluster_min:
            # Theroretical traveltimes and arrival times
            t_th, arr_times = traveltimes(x, y, z, to, stations, time_grids)
            # do double difference location
            x, y, z, to = do_double_diff(x, y, z, to, stations, coeff, delay,
                                         cluster[i], threshold, t_th,
                                         arr_times)

        if verbose:
            from clustering import compute_nbsta
            nbsta = compute_nbsta(len(locs), coeff, threshold)
            plot_events(cluster, locs, stations, x, y, z, i, threshold, nbmin,
                        area, nbsta)

        if dd_loc:
            ind = 0
        for j in cluster[i]:
            locs[j - 1]['x_mean'] = x[ind]
            locs[j - 1]['y_mean'] = y[ind]
            locs[j - 1]['z_mean'] = z[ind]
            locs[j - 1]['o_time'] = to[ind]
            locs[j - 1]['x_sigma'] = 0
            locs[j - 1]['y_sigma'] = 0
            locs[j - 1]['z_sigma'] = 0
            locs[j - 1]['o_err_right'] = 0
            locs[j - 1]['o_err_left'] = 0
            ind += 1
            new_loc_file.write(
                "Max = %.2f, %s - %.2f s + %.2f s, x= %.4f pm\
                %.4f km, y= %.4f pm %.4f km, z= %.4f pm %.4f km\n" %
                (locs[j - 1]['max_trig'], locs[j - 1]['o_time'].isoformat(),
                 locs[j - 1]['o_err_left'], locs[j - 1]['o_err_right'],
                 locs[j - 1]['x_mean'], locs[j - 1]['x_sigma'],
                 locs[j - 1]['y_mean'], locs[j - 1]['y_sigma'],
                 locs[j - 1]['z_mean'], locs[j - 1]['z_sigma']))

    if dd_loc:
        new_loc_file.close()
示例#12
0
def do_migration_setup_and_run(opdict):

  base_path=opdict['base_path']
  verbose=opdict['verbose']
  runtime=opdict['time']
  reloc=opdict['reloc']

  # stations
  stations_filename=os.path.join(base_path,'lib',opdict['stations'])
  stations=read_stations_file(stations_filename)

  # output directory
  output_dir=os.path.join(base_path,'out',opdict['outdir'])
  stack_dir=os.path.join(output_dir,'stack')

  # data
  data_dir=os.path.join(base_path,'data',opdict['datadir'])
  if opdict['kderiv']:
    data_glob=opdict['gradglob']
    if opdict['gauss']:
      data_glob=opdict['gaussglob']
  else:
    data_glob=opdict['kurtglob']
  data_files=glob.glob(os.path.join(data_dir,data_glob))
  data_files.sort()
  if len(data_files)==0: 
    logging.error('No data files found for %s and %s'%(data_dir,data_glob))
    raise UserWarning

  # grids
  grid_filename_base=os.path.join(base_path,'lib',opdict['time_grid'])
  search_grid_filename=os.path.join(base_path,'lib',opdict['search_grid'])
  time_grids=get_interpolated_time_grids(opdict)

  #start and end times
  starttime=opdict['starttime']
  endtime=opdict['endtime']
  data_length=opdict['data_length']
  data_overlap=opdict['data_overlap']

  initial_start_time=utcdatetime.UTCDateTime(starttime)
  initial_end_time=initial_start_time+data_length

  final_end_time=utcdatetime.UTCDateTime(endtime)

  time_shift_secs=data_length-data_overlap


  ######### FOR EACH TIME SPAN - DO MIGRATION #############

  # start loop over time
  start_time=initial_start_time
  end_time=initial_end_time

  if runtime:
    t_ref=time()  

  while (start_time < final_end_time):

    # read data
    logging.info("Reading data  : %s - %s."%(start_time.isoformat(), end_time.isoformat()))
    data,delta=read_data_compatible_with_time_dict(data_files,time_grids,start_time,end_time)

    print len(data_files)

    if reloc:
      tr_glob=opdict['kurtglob']
      files=glob.glob(os.path.join(data_dir,tr_glob))
      traces,delta=read_data_compatible_with_time_dict(files,time_grids,start_time,end_time)
      sta_list=sorted(traces)
      for staname in sta_list:
        snr=np.max(traces[staname])/np.mean(np.abs(traces[staname]))
        if snr < opdict['reloc_snr']:
          data[staname]=np.zeros(len(data[staname]))

    # re-read grid_info at each iteration to make sure it is a clean copy
    grid_info=read_hdr_file(search_grid_filename)

    # do migration if have enough data (3 is bare minimum)
    if len(data.keys())>=3:
      logging.info("Migrating data : %s - %s."%(start_time.isoformat(), end_time.isoformat()))
      do_migration_loop_continuous(opdict, data, delta, start_time, grid_info, time_grids)
    elif len(data.keys())==0:
      logging.warn('No data found between %s and %s.'%(start_time.isoformat(),end_time.isoformat()))
    else:
      logging.warn('Insufficient data found between %s and %s.'%(start_time.isoformat(),end_time.isoformat()))
      
    # Reset the start and end times to loop again
    start_time=start_time+time_shift_secs
    end_time=end_time+time_shift_secs

  if runtime:
    t=time()-t_ref
    logging.info("Time for migrating all time slices : %.2f s\n" % (t))
示例#13
0
def do_clustering_setup_and_run(opdict):

    base_path = opdict['base_path']
    verbose = opdict['verbose']

    # stations
    stations_filename = os.path.join(base_path, 'lib', opdict['stations'])

    # output directory
    output_dir = os.path.join(base_path, 'out', opdict['outdir'])

    # data
    data_dir = os.path.join(base_path, 'data', opdict['datadir'])
    data_glob = opdict['dataglob']
    data_files = glob.glob(os.path.join(data_dir, data_glob))
    data_files.sort()

    # location file
    locdir = os.path.join(base_path, 'out', opdict['outdir'], 'loc')
    loc_filename = os.path.join(locdir, 'locations.dat')

    # file containing correlation values
    coeff_file = os.path.join(locdir, opdict['xcorr_corr'])
    # Read correlation values
    b = BinaryFile(coeff_file)
    coeff = b.read_binary_file()

    # file containing time delays
    delay_file = os.path.join(locdir, opdict['xcorr_delay'])

    # INPUT PARAMETERS
    nbmin = int(opdict['nbsta'])
    if nbmin > len(coeff.keys()):
        raise Error(
            'the minimum number of stations cannot be > to the number of stations !!'
        )
    event = len(coeff.values()[0])
    tplot = float(opdict['clus'])  # threshold for which we save and plot
    cluster_file = "%s/cluster-%s-%s" % (locdir, str(tplot), str(nbmin))

    corr = [opdict['clus']]
    #corr=np.arange(0,1.1,0.1)
    for threshold in corr:
        threshold = float(threshold)
        nbsta = compute_nbsta(event, coeff, threshold)

        CLUSTER = do_clustering(event, nbsta, nbmin)

        if threshold == tplot:

            print "----------------------------------------------"
            print "THRESHOLD : ", threshold, " # STATIONS : ", nbmin
            print "# CLUSTERS : ", len(CLUSTER)
            print CLUSTER

            c = BinaryFile(cluster_file)
            c.write_binary_file(CLUSTER)
            print "Written in %s" % cluster_file

            if verbose:  # PLOT
                # Read location file
                locs = read_locs_from_file(loc_filename)
                # Read station file
                stations = read_stations_file(stations_filename)

                # Look at the waveforms
                plot_traces(CLUSTER, delay_file, coeff, locs, stations,
                            data_dir, data_files, threshold)
示例#14
0
def generateSyntheticDirac(opdict,time_grids=None):
    # Creates the synthetic dataset for us to work with

    from NllGridLib import read_stations_file, read_hdr_file
    from migration import migrate_4D_stack, extract_max_values
    from hdf5_grids import get_interpolated_time_grids

    load_time_grids = False
    if time_grids==None : load_time_grids = True

    #define length and sampling frequency of synthetic data
    s_amplitude   = opdict['syn_amplitude']
    s_data_length = opdict['syn_datalength']
    s_sample_freq = opdict['syn_samplefreq']
    s_filename    = opdict['syn_filename']

    
    s_npts=int(s_data_length*s_sample_freq)
    s_delta=1/s_sample_freq
    s_kwidth=opdict['syn_kwidth']
    s_nkwidth=int(round(s_kwidth*s_sample_freq))

    # define origin time
    s_t0 = opdict['syn_otime']


    base_path=opdict['base_path']
    outdir=opdict['outdir']
    test_grid_file=os.path.join(base_path,'out',opdict['outdir'],'grid',s_filename)
    test_stack_file=os.path.join(base_path,'out',opdict['outdir'],'stack','stack_all_'+s_filename)
    test_info_file=os.path.join(base_path,'out',opdict['outdir'],'grid','%s.info'%s_filename)

    fig_path = os.path.join(base_path,'out',outdir,'fig')

    # get filenames for time-grids and search grids 
    grid_filename_base   = os.path.join(base_path,'lib',opdict['time_grid'])
    search_grid_filename = os.path.join(base_path,'lib',opdict['search_grid'])
    stations_filename    = os.path.join(base_path,'lib',opdict['stations'])
    stations=read_stations_file(stations_filename)

    if opdict.has_key('sta_list') :
        sta_list=opdict['sta_list'].split(',')
    else:
        sta_list=stations.keys()

    # get parameters for noise etc
    syn_addnoise=opdict['syn_addnoise']

    #################################
    # start setting up synthetic data
    #################################

    grid_info=read_hdr_file(search_grid_filename)

    if load_time_grids:
      time_grids=get_interpolated_time_grids(opdict)

    #################################
    # create synthetic data
    #################################

    # choose hypocenter
    nx=grid_info['nx']
    ny=grid_info['ny']
    nz=grid_info['nz']

    dx=grid_info['dx']
    dy=grid_info['dy']
    dz=grid_info['dz']

    x_orig=grid_info['x_orig']
    y_orig=grid_info['y_orig']
    z_orig=grid_info['z_orig']

    ix=opdict['syn_ix']
    iy=opdict['syn_iy']
    iz=opdict['syn_iz']
    it=int(round(s_t0/s_delta))

    # retrieve travel times for chosen hypocenter 
    # and station list
    ib= ix*ny*nz + iy*nz + iz
    n_buf=nx*ny*nz
    logging.debug('ib for true hypocenter = %d'%ib)
    ttimes={}
    for sta in sta_list:
        if time_grids.has_key(sta):
            ttimes[sta]=time_grids[sta].grid_data[ib]
        else:
            logging.info('Missing travel-time information for station %s. Ignoring station...'%sta)
    logging.debug('Travel-times for true hypocenter = %s'%ttimes)

    # construct data with these travel times
    data={}
    for key,delay in ttimes.iteritems():
      if syn_addnoise:
        s_snr=opdict['syn_snr']       
        s=np.random.rand(s_npts)*s_amplitude/s_snr
      else:
        s=np.zeros(s_npts)
      atime=s_t0+delay
      i_atime=np.int(atime/s_delta)
      if i_atime+s_nkwidth > len(s) :
        logging.error('syn_datalength is too small compared with geographical size of network ')
      s[i_atime:i_atime+s_nkwidth]=s_amplitude-np.arange(s_nkwidth)*(s_amplitude/float(s_nkwidth))
      data[key]=s
      

    # DO MIGRATION
    
    logging.info('Doing migration to %s'%test_grid_file)
    f=h5py.File(test_grid_file,'w')
    stack_grid=f.create_dataset('stack_grid',(n_buf,s_npts),'f',chunks=(1,s_npts))
    stack_shift_time = migrate_4D_stack(data,s_delta,time_grids,stack_grid)
    n_buf,nt=stack_grid.shape

    # add useful information to dataset
    for key,value in grid_info.iteritems():
      stack_grid.attrs[key]=value
    stack_grid.attrs['dt']=s_delta
    stack_grid.attrs['start_time']=-stack_shift_time

    # extract max-stack
    logging.info('Extracting max_val etc. to %s'%test_stack_file)
    f_stack = h5py.File(test_stack_file,'w')
    # extract maxima
    extract_max_values(stack_grid,grid_info,f_stack)
    for name in f_stack:
      dset=f_stack[name]
      logging.debug('After extract_max_values : %s %f %f'%(name,np.max(dset),np.sum(dset)))
      dset.attrs['start_time']=-stack_shift_time
      dset.attrs['dt']=s_delta

    # close the stack and grid files 
    f_stack.close()
    f.close()
    logging.info('Saved 4D grid to file %s'%test_grid_file)

    shifted_it=it+int(round(stack_shift_time/s_delta))

    # SETUP information to pass back
    test_info={}
    test_info['dat_file']=test_grid_file
    test_info['stack_file']=test_stack_file
    test_info['grid_shape']=nx,ny,nz,nt
    test_info['grid_spacing']=dx,dy,dz,s_delta
    test_info['grid_orig']=x_orig,y_orig,z_orig
    test_info['true_indexes']=(ix,iy,iz,shifted_it)
    test_info['start_time']=-stack_shift_time

    logging.debug(test_info)
    f=open(test_info_file,'w')
    f.write(str(test_info))

    return test_info
示例#15
0
def do_clustering_setup_and_run(opdict):
    """
    Does clustering by applying the depth first search algorithm and saves the result 
    (= a dictionary containing the event indexes forming each cluster) in a binary file.
    Needs to define the correlation value threshold and the minimum number of stations 
    where this threshold should be reached to form a cluster (should be done in the options
    dictionary)

    :param opdict: Dictionary of waveloc options

    """

    base_path = opdict['base_path']
    verbose = opdict['verbose']

    # stations
    stations_filename = os.path.join(base_path, 'lib', opdict['stations'])

    # data
    data_dir = os.path.join(base_path, 'data', opdict['datadir'])
    data_glob = opdict['dataglob']
    data_files = glob.glob(os.path.join(data_dir, data_glob))
    data_files.sort()

    # location file
    locdir = os.path.join(base_path, 'out', opdict['outdir'], 'loc')
    loc_filename = os.path.join(locdir, 'locations.dat')

    # file containing correlation values
    coeff_file = os.path.join(locdir, opdict['xcorr_corr'])
    # Read correlation values
    b = BinaryFile(coeff_file)
    coeff = b.read_binary_file()

    # INPUT PARAMETERS
    nbmin = int(opdict['nbsta'])
    if nbmin > len(coeff.keys()):
        raise Exception('the minimum number of stations cannot be > to the\
                         number of stations !!')
    event = len(coeff.values()[0])
    tplot = float(opdict['clus'])  # threshold for which we save and plot
    cluster_file = "%s/cluster-%s-%s" % (locdir, str(tplot), str(nbmin))

    corr = [opdict['clus']]
    #corr = np.arange(0, 1.1, 0.1)
    for threshold in corr:
        threshold = float(threshold)
        nbsta = compute_nbsta(event, coeff, threshold)

        CLUSTER = do_clustering(event, nbsta, nbmin)

        if threshold == tplot:

            print "----------------------------------------------"
            print "THRESHOLD : ", threshold, " # STATIONS : ", nbmin
            print "# CLUSTERS : ", len(CLUSTER)
            print CLUSTER

            c = BinaryFile(cluster_file)
            c.write_binary_file(CLUSTER)
            print "Written in %s" % cluster_file

            if verbose:  # PLOT
                # Read location file
                locs = read_locs_from_file(loc_filename)
                # Read station file
                stations = read_stations_file(stations_filename)

                # Look at the waveforms
                #plot_traces(CLUSTER, delay_file, coeff, locs,
                #            data_dir, data_files, threshold)

                # Plot graphs
                plot_graphs(locs, stations, nbsta, CLUSTER, nbmin, threshold)
示例#16
0
def do_plotting_setup_and_run(opdict,plot_wfm=True,plot_grid=True):

  # get / set info
  base_path=opdict['base_path']

  locfile=os.path.join(base_path,'out',opdict['outdir'],'loc','locations.dat')
  stackfile=os.path.join(base_path,'out',opdict['outdir'],'stack','combined_stack_all.hdf5')
  grid_dir=os.path.join(base_path,'out',opdict['outdir'],'grid')
  output_dir=os.path.join(base_path,'out',opdict['outdir'])

  data_dir=os.path.join(base_path,'data',opdict['datadir'])

  data_glob=opdict['dataglob']
  data_files=glob.glob(os.path.join(data_dir,data_glob))
  data_files.sort()

  kurt_glob=opdict['kurtglob']
  kurt_files=glob.glob(os.path.join(data_dir,kurt_glob))
  kurt_files.sort()
  mig_files=kurt_files

  if opdict['kderiv']:
    grad_glob=opdict['gradglob']
    grad_files=glob.glob(os.path.join(data_dir,grad_glob))
    grad_files.sort()
    mig_files=grad_files

    if opdict['gauss']:
      gauss_glob=opdict['gaussglob']
      gauss_files=glob.glob(os.path.join(data_dir,gauss_glob))
      gauss_files.sort()
      mig_files=gauss_files

  figdir=os.path.join(base_path,'out',opdict['outdir'],'fig')

  # stations
  stations_filename=os.path.join(base_path,'lib',opdict['stations'])
  stations=read_stations_file(stations_filename)


  # grids
  grid_filename_base=os.path.join(base_path,'lib',opdict['time_grid'])
  search_grid_filename=os.path.join(base_path,'lib',opdict['search_grid'])
  # read time grid information
  time_grids=get_interpolated_time_grids(opdict)

  # read locations
  locs=read_locs_from_file(locfile)

  # open stack file
  f_stack=h5py.File(stackfile,'r')
  max_val=f_stack['max_val_smooth']
  stack_start_time=UTCDateTime(max_val.attrs['start_time'])
  
  for loc in locs:
    # generate the grids
    o_time=loc['o_time']
    start_time=o_time-opdict['plot_tbefore']
    end_time=o_time+opdict['plot_tafter']

    # re-read grid info to ensure clean copy
    grid_info=read_hdr_file(search_grid_filename)
    nx=grid_info['nx']
    ny=grid_info['ny']
    nz=grid_info['nz']
    dx=grid_info['dx']
    dy=grid_info['dy']
    dz=grid_info['dz']

    x=loc['x_mean']
    y=loc['y_mean']
    z=loc['z_mean']
    # get the corresponding travel-times for time-shifting
    ttimes={}
    for sta in time_grids.keys():
        ttimes[sta]=time_grids[sta].value_at_point(x,y,z)

    tshift_migration=max(ttimes.values())

    start_time_migration=start_time-tshift_migration
    end_time_migration=end_time+tshift_migration

    if plot_grid:
      logging.info('Plotting grid for location %s'%o_time.isoformat())
      # TODO implement a rough estimation of the stack shift based on propagation time across the whole network

      # read data
      mig_dict,delta = read_data_compatible_with_time_dict(mig_files,
            time_grids, start_time_migration, end_time_migration)

      # do migration
      do_migration_loop_continuous(opdict, mig_dict, delta,
            start_time_migration, grid_info, time_grids, keep_grid=True)

      # plot
      plotLocationGrid(loc,grid_info,figdir,opdict['plot_otime_window'])

    if plot_wfm:

      logging.info('Plotting waveforms for location %s'%o_time.isoformat())

      # get the index of the location
#      ix=np.int(np.round((loc['x_mean']-grid_info['x_orig'])/dx))
#      iy=np.int(np.round((loc['y_mean']-grid_info['y_orig'])/dy))
#      iz=np.int(np.round((loc['z_mean']-grid_info['z_orig'])/dz))
#      ib= ix*ny*nz + iy*nz + iz

      # read data
      data_dict,delta = read_data_compatible_with_time_dict(data_files,
            time_grids, start_time_migration, end_time_migration)
      mig_dict,delta = read_data_compatible_with_time_dict(mig_files,
            time_grids, start_time_migration, end_time_migration)

      # cut desired portion out of data
      for sta in data_dict.keys():
          tmp=data_dict[sta]
          istart=np.int(np.round(
              (start_time + ttimes[sta] - start_time_migration) / delta))
          iend=istart + np.int(np.round(
              (opdict['plot_tbefore'] + opdict['plot_tafter'])  / delta))
          # sanity check in case event is close to start or end of data
          if istart < 0 : istart=0
          if iend   > len(tmp) : iend = len(tmp)
          data_dict[sta]=tmp[istart:iend]
          # do slice
          tmp=mig_dict[sta]
          mig_dict[sta]=tmp[istart:iend]

      # retrieve relevant portion of stack max
      istart=np.int(np.round(
          (o_time - opdict['plot_tbefore'] -stack_start_time) / delta))
      iend=istart + np.int(np.round(
          (opdict['plot_tbefore'] + opdict['plot_tafter'])  / delta))
      # sanity check in case event is close to start or end of data
      if istart < 0 : 
          start_time = start_time + np.abs(istart)*dt
          istart=0
      if iend   > len(max_val) : iend = len(max_val)
      # do slice
      stack_wfm=max_val[istart:iend]

      # plot
      plotLocationWaveforms(loc,start_time,delta,data_dict,mig_dict,stack_wfm,figdir)

  f_stack.close()