Пример #1
0
def pygeons_merge(input_files, output_stem=None):
    ''' 
  Merge data files
  '''
    logger.info('Running pygeons merge ...')
    data_list = [dict_from_hdf5(i) for i in input_files]
    data_list = _common_context(data_list)
    out = data_list[0]
    for d in data_list[1:]:
        for dir in ['east', 'north', 'vertical']:
            # overwrite data in *out* with non-missing data in *d*
            missing_in_d = np.isinf(d[dir + '_std_dev'])
            missing_in_out = np.isinf(out[dir + '_std_dev'])
            if np.any(~missing_in_d & ~missing_in_out):
                warnings.warn(
                    'Data for some stations and times exist in multiple '
                    'datasets. Precedence is determined by the order the data '
                    'files were specified in.')

            out[dir][~missing_in_d] = d[dir][~missing_in_d]
            out[dir + '_std_dev'][~missing_in_d] = d[dir +
                                                     '_std_dev'][~missing_in_d]

    # set output file name
    if output_stem is None:
        output_stem = 'merged'

    output_file = output_stem + '.h5'
    hdf5_from_dict(output_file, out)
    logger.info('Merged data written to %s' % output_file)
Пример #2
0
def pygeons_merge(input_files,output_stem=None):
  ''' 
  Merge data files
  '''
  logger.info('Running pygeons merge ...')
  data_list = [dict_from_hdf5(i) for i in input_files]
  data_list = _common_context(data_list)
  out = data_list[0]
  for d in data_list[1:]:
    for dir in ['east','north','vertical']:
      # overwrite data in *out* with non-missing data in *d*
      missing_in_d = np.isinf(d[dir+'_std_dev'])
      missing_in_out = np.isinf(out[dir+'_std_dev'])
      if np.any(~missing_in_d & ~missing_in_out):
        warnings.warn(
          'Data for some stations and times exist in multiple '
          'datasets. Precedence is determined by the order the data '
          'files were specified in.')
        
      out[dir][~missing_in_d] = d[dir][~missing_in_d]
      out[dir+'_std_dev'][~missing_in_d] = d[dir+'_std_dev'][~missing_in_d]

  # set output file name
  if output_stem is None:
    output_stem = 'merged'

  output_file = output_stem + '.h5'
  hdf5_from_dict(output_file,out)
  logger.info('Merged data written to %s' % output_file)
Пример #3
0
def pygeons_autoclean(input_file,
                      network_model=('spwen12-se',),
                      network_params=(1.0,0.1,100.0),
                      station_model=('linear',),
                      station_params=(),
                      output_stem=None,
                      outlier_tol=4.0):
  ''' 
  Remove outliers with a data editing algorithm
  '''
  logger.info('Running pygeons autoclean ...')
  data = dict_from_hdf5(input_file)
  if data['time_exponent'] != 0:
    raise ValueError('input dataset must have units of displacement')

  if data['space_exponent'] != 1:
    raise ValueError('input dataset must have units of displacement')

  # dictionary which will contain the edited data
  out = dict((k,np.copy(v)) for k,v in data.iteritems())

  # convert params to a dictionary of hyperparameters for each direction
  network_params = _params_dict(network_params)
  station_params = _params_dict(station_params)

  # make output file name
  if output_stem is None:
    output_stem = _remove_extension(input_file) + '.autoclean'

  output_file = output_stem + '.h5'
  
  # convert geodetic positions to cartesian
  bm = make_basemap(data['longitude'],data['latitude'])
  x,y = bm(data['longitude'],data['latitude'])
  xy = np.array([x,y]).T

  _log_autoclean(input_file,
                 network_model,network_params,
                 station_model,station_params,
                 outlier_tol,
                 output_file)
  
  for dir in ['east','north','vertical']:
    de,sde = autoclean(t=data['time'][:,None],
                       x=xy, 
                       d=data[dir],
                       sd=data[dir+'_std_dev'],
                       network_model=network_model,
                       network_params=network_params[dir],
                       station_model=station_model,
                       station_params=station_params[dir],
                       tol=outlier_tol)
    out[dir] = de
    out[dir+'_std_dev'] = sde

  hdf5_from_dict(output_file,out)
  logger.info('Edited data written to %s' % output_file)
  return
Пример #4
0
def pygeons_fit(input_file,
                network_model=('spwen12-se',),
                network_params=(1.0,0.1,100.0),
                station_model=('linear',),
                station_params=(),
                output_stem=None):
  ''' 
  Condition the Gaussian process to the observations and evaluate the
  posterior at the observation points.
  '''
  logger.info('Running pygeons fit ...')
  data = dict_from_hdf5(input_file)
  if data['time_exponent'] != 0:
    raise ValueError('input dataset must have units of displacement')

  if data['space_exponent'] != 1:
    raise ValueError('input dataset must have units of displacement')

  # create output dictionary
  out = dict((k,np.copy(v)) for k,v in data.iteritems())

  # convert params to a dictionary of hyperparameters for each direction
  network_params = _params_dict(network_params)
  station_params = _params_dict(station_params)

  # make output file name
  if output_stem is None:
    output_stem = _remove_extension(input_file) + '.fit'

  output_file = output_stem + '.h5'
  
  # convert geodetic positions to cartesian
  bm = make_basemap(data['longitude'],data['latitude'])
  x,y = bm(data['longitude'],data['latitude'])
  xy = np.array([x,y]).T

  _log_fit(input_file,
           network_model,network_params,
           station_model,station_params,
           output_file)
  
  for dir in ['east','north','vertical']:
    u,su = fit(t=data['time'][:,None],
               x=xy,      
               d=data[dir],
               sd=data[dir+'_std_dev'],
               network_model=network_model,
               network_params=network_params[dir],
               station_model=station_model,
               station_params=station_params[dir])
    out[dir] = u
    out[dir+'_std_dev'] = su

  hdf5_from_dict(output_file,out)
  logger.info('Posterior fit written to %s' % output_file)
  return
Пример #5
0
def pygeons_toh5(input_text_file, file_type='csv', output_stem=None):
    ''' 
  converts a text file to an hdf5 file
  '''
    logger.info('Running pygeons toh5 ...')
    data = dict_from_text(input_text_file, parser=file_type)
    if output_stem is None:
        output_stem = _remove_extension(input_text_file)

    output_file = output_stem + '.h5'
    hdf5_from_dict(output_file, data)
    logger.info('Data written to %s' % output_file)
    return
Пример #6
0
def pygeons_toh5(input_text_file,file_type='csv',output_stem=None):
  ''' 
  converts a text file to an hdf5 file
  '''
  logger.info('Running pygeons toh5 ...')
  data = dict_from_text(input_text_file,parser=file_type)
  if output_stem is None:
    output_stem = _remove_extension(input_text_file)

  output_file = output_stem + '.h5'  
  hdf5_from_dict(output_file,data)
  logger.info('Data written to %s' % output_file)
  return
Пример #7
0
def pygeons_clean(input_file,
                  resolution='i',
                  input_edits_file=None,
                  break_lons=None,
                  break_lats=None,
                  break_conn=None,
                  no_display=False,
                  output_stem=None,
                  **kwargs):
    ''' 
  runs the PyGeoNS Interactive Cleaner
  
  Parameters
  ----------
    data : dict
      data dictionary

    resolution : str
      basemap resolution    
    
    input_edits_file : str
      Name of the file containing edits which will automatically be 
      applied before opening up the interactive viewer.
    
    output_edits_file : str
      Name of the file where all edits will be recorded.   
      
    **kwargs : 
      gets passed to pygeons.clean.clean
         
  Returns
  -------
    out : dict
      output data dictionary 
    
  '''
    logger.info('Running pygeons clean ...')
    data = dict_from_hdf5(input_file)
    out = dict((k, np.copy(v)) for k, v in data.iteritems())

    ts_fig, ts_ax = plt.subplots(3,
                                 1,
                                 sharex=True,
                                 num='Time Series View',
                                 facecolor='white')
    _setup_ts_ax(ts_ax)
    map_fig, map_ax = plt.subplots(num='Map View', facecolor='white')
    bm = make_basemap(data['longitude'],
                      data['latitude'],
                      resolution=resolution)
    _setup_map_ax(bm, map_ax)
    x, y = bm(data['longitude'], data['latitude'])
    pos = np.array([x, y]).T
    t = data['time']
    dates = [mjd_inv(ti, '%Y-%m-%d') for ti in t]
    units = _unit_string(data['space_exponent'], data['time_exponent'])
    conv = 1.0 / unit_conversion(units, time='day', space='m')
    u = conv * data['east']
    v = conv * data['north']
    z = conv * data['vertical']
    su = conv * data['east_std_dev']
    sv = conv * data['north_std_dev']
    sz = conv * data['vertical_std_dev']
    ic = InteractiveCleaner(t,
                            pos,
                            u=u,
                            v=v,
                            z=z,
                            su=su,
                            sv=sv,
                            sz=sz,
                            map_ax=map_ax,
                            ts_ax=ts_ax,
                            time_labels=dates,
                            units=units,
                            station_labels=data['id'],
                            **kwargs)

    # make edits to the data set prior to displaying it
    if input_edits_file is not None:
        with open(input_edits_file, 'r') as fin:
            for line in fin:
                # ignore blank lines
                if line.isspace():
                    continue

                type, sta, a, b = line.strip().split()
                # set the current station in *ic* to the station for this edit
                xidx, = (data['id'] == sta).nonzero()
                if len(xidx) == 0:
                    # continue because the station does not exist in this
                    # dataset
                    continue

                ic.xidx = xidx[0]
                if type == 'outliers':
                    start_time = mjd(a, '%Y-%m-%d')
                    stop_time = mjd(b, '%Y-%m-%d')
                    ic.remove_outliers(start_time, stop_time)
                elif type == 'jump':
                    jump_time = mjd(a, '%Y-%m-%d')
                    delta = int(b)
                    ic.remove_jump(jump_time, delta)
                else:
                    raise ValueError(
                        'edit type must be either "outliers" or "jump"')

    if not no_display:
        ic.update()
        ic.connect()

    # set output file name
    if output_stem is None:
        output_stem = _remove_extension(input_file) + '.clean'

    output_file = output_stem + '.h5'
    output_edits_file = output_stem + '.txt'

    with open(output_edits_file, 'w') as fout:
        for i in ic.log:
            type, xidx, a, b = i
            if type == 'outliers':
                station = data['id'][xidx]
                start_date = mjd_inv(a, '%Y-%m-%d')
                stop_date = mjd_inv(b, '%Y-%m-%d')
                fout.write('outliers %s %s %s\n' %
                           (station, start_date, stop_date))
            elif type == 'jump':
                station = data['id'][xidx]
                jump_date = mjd_inv(a, '%Y-%m-%d')
                fout.write('jump     %s %s %s\n' % (station, jump_date, b))
            else:
                raise ValueError(
                    'edit type must be either "outliers" or "jump"')

    logger.info('Edits saved to %s' % output_edits_file)
    clean_data = ic.get_data()
    out['east'] = clean_data[0] / conv
    out['north'] = clean_data[1] / conv
    out['vertical'] = clean_data[2] / conv
    out['east_std_dev'] = clean_data[3] / conv
    out['north_std_dev'] = clean_data[4] / conv
    out['vertical_std_dev'] = clean_data[5] / conv

    hdf5_from_dict(output_file, out)
    logger.info('Cleaned data written to %s' % output_file)
    logger.info('Edits written to %s' % output_edits_file)
    return
#!/usr/bin/env python
# this script removes high sigma values from an HDF5 file
import numpy as np
from pygeons.io.convert import dict_from_hdf5, hdf5_from_dict
import sys
fname = sys.argv[1]
cutoff = 0.02  # 2 cm cutoff
data = dict_from_hdf5(fname)
mask = ((data['north_std_dev'] > cutoff) | (data['east_std_dev'] > cutoff) |
        (data['vertical_std_dev'] > cutoff))

data['north'][mask] = np.nan
data['north_std_dev'][mask] = np.inf

data['east'][mask] = np.nan
data['east_std_dev'][mask] = np.inf

data['vertical'][mask] = np.nan
data['vertical_std_dev'][mask] = np.inf

hdf5_from_dict(fname, data)
Пример #9
0
def pygeons_crop(input_file,
                 start_date=None,
                 stop_date=None,
                 min_lat=-np.inf,
                 max_lat=np.inf,
                 min_lon=-np.inf,
                 max_lon=np.inf,
                 stations=None,
                 output_stem=None):
    ''' 
  Sets the time span of the data set to be between *start_date* and
  *stop_date*. Sets the stations to be within the latitude and
  longitude bounds. 
  
  Parameters
  ----------
  data : dict
    data dictionary
      
  start_date : str, optional
    start date of output data set in YYYY-MM-DD. Uses the start date 
    of *data* if not provided. Defaults to the earliest date.

  stop_date : str, optional
    Stop date of output data set in YYYY-MM-DD. Uses the stop date 
    of *data* if not provided. Defaults to the latest date.
      
  min_lon, max_lon, min_lat, max_lat : float, optional
    Spatial bounds on the output data set
  
  stations : str list, optional
    List of stations to be removed from the dataset. This is in 
    addition to the station removed by the lon/lat bounds.
    
  Returns
  -------
  out_dict : dict
    output data dictionary

  '''
    logger.info('Running pygeons crop ...')
    data = dict_from_hdf5(input_file)
    out = dict((k, np.copy(v)) for k, v in data.iteritems())

    if start_date is None:
        start_date = mjd.mjd_inv(data['time'].min(), '%Y-%m-%d')

    if stop_date is None:
        stop_date = mjd.mjd_inv(data['time'].max(), '%Y-%m-%d')

    if stations is None:
        stations = []

    # remove times that are not within the bounds of *start_date* and
    # *stop_date*
    start_time = int(mjd.mjd(start_date, '%Y-%m-%d'))
    stop_time = int(mjd.mjd(stop_date, '%Y-%m-%d'))
    idx = ((data['time'] >= start_time) & (data['time'] <= stop_time))
    out['time'] = out['time'][idx]
    for dir in ['east', 'north', 'vertical']:
        out[dir] = out[dir][idx, :]
        out[dir + '_std_dev'] = out[dir + '_std_dev'][idx, :]

    # find stations that are within the bounds
    in_bounds = ((data['longitude'] > min_lon) & (data['longitude'] < max_lon)
                 & (data['latitude'] > min_lat) & (data['latitude'] < max_lat))
    # find stations that are in the list of stations to be removed
    in_list = np.array([i in stations for i in data['id']])
    # keep stations that are in bounds and not in the list
    idx, = (in_bounds & ~in_list).nonzero()

    out['id'] = out['id'][idx]
    out['longitude'] = out['longitude'][idx]
    out['latitude'] = out['latitude'][idx]
    for dir in ['east', 'north', 'vertical']:
        out[dir] = out[dir][:, idx]
        out[dir + '_std_dev'] = out[dir + '_std_dev'][:, idx]

    # set output file name
    if output_stem is None:
        output_stem = _remove_extension(input_file) + '.crop'

    output_file = output_stem + '.h5'
    hdf5_from_dict(output_file, out)
    logger.info('Cropped data written to %s' % output_file)
    return
Пример #10
0
def pygeons_strain(input_file,
                   network_prior_model=('spwen12-se',),
                   network_prior_params=(1.0,0.1,100.0),
                   network_noise_model=(),
                   network_noise_params=(),
                   station_noise_model=('linear',),
                   station_noise_params=(),
                   start_date=None,stop_date=None,
                   positions=None,positions_file=None,
                   rate=True,vertical=True,covariance=False,
                   output_stem=None):
  ''' 
  calculates strain
  '''
  logger.info('Running pygeons strain ...')
  data = dict_from_hdf5(input_file)
  if data['time_exponent'] != 0:
    raise ValueError('input dataset must have units of displacement')

  if data['space_exponent'] != 1:
    raise ValueError('input dataset must have units of displacement')
    
  out_dx = dict((k,np.copy(v)) for k,v in data.iteritems())
  out_dy = dict((k,np.copy(v)) for k,v in data.iteritems())

  # convert params to a dictionary of hyperparameters for each direction
  network_prior_params = _params_dict(network_prior_params)
  network_noise_params = _params_dict(network_noise_params)
  station_noise_params = _params_dict(station_noise_params)
  
  # convert geodetic input positions to cartesian
  bm = make_basemap(data['longitude'],data['latitude'])
  x,y = bm(data['longitude'],data['latitude'])
  xy = np.array([x,y]).T

  # set output positions
  if (positions is None) & (positions_file is None):
    # no output positions were specified so return the solution at the
    # input data positions
    output_id = np.array(data['id'],copy=True)
    output_lon = np.array(data['longitude'],copy=True)
    output_lat = np.array(data['latitude'],copy=True)

  else:  
    output_id = np.zeros((0,),dtype=str)
    output_lon = np.zeros((0,),dtype=float)
    output_lat = np.zeros((0,),dtype=float)
    if positions_file is not None:
      # if positions file was specified
      pos = np.loadtxt(positions_file,dtype=str,ndmin=2)
      if pos.shape[1] != 3:
        raise ValueError(
          'positions file must contain a column for IDs, longitudes, '
          'and latitudes')
          
      output_id = np.hstack((output_id,pos[:,0]))
      output_lon = np.hstack((output_lon,pos[:,1].astype(float)))
      output_lat = np.hstack((output_lat,pos[:,2].astype(float)))
    
    if positions is not None:  
      # if positions were specified via the command line
      pos = np.array(positions,dtype=str).reshape((-1,3))
      output_id = np.hstack((output_id,pos[:,0]))
      output_lon = np.hstack((output_lon,pos[:,1].astype(float)))
      output_lat = np.hstack((output_lat,pos[:,2].astype(float)))

  # convert geodetic output positions to cartesian
  output_x,output_y = bm(output_lon,output_lat)
  output_xy = np.array([output_x,output_y]).T 
  
  # set output times
  if start_date is None:
    start_date = mjd_inv(np.min(data['time']),'%Y-%m-%d')

  if stop_date is None:
    stop_date = mjd_inv(np.max(data['time']),'%Y-%m-%d')
  
  start_time = mjd(start_date,'%Y-%m-%d')  
  stop_time = mjd(stop_date,'%Y-%m-%d')  
  output_time = np.arange(start_time,stop_time+1)
  
  # set output file names
  if output_stem is None:
    output_stem = _remove_extension(input_file) + '.strain'

  output_dx_file = output_stem + '.dudx.h5'
  output_dy_file = output_stem + '.dudy.h5'

  _log_strain(input_file,
              network_prior_model,network_prior_params, 
              network_noise_model,network_noise_params, 
              station_noise_model,station_noise_params, 
              start_date,stop_date,output_id,rate,vertical,
              covariance,output_dx_file,output_dy_file)

  for dir in ['east','north','vertical']:
    if (dir == 'vertical') & (not vertical):
      logger.debug('Not computing vertical deformation gradients')
      # do not compute the deformation gradients for vertical. Just
      # return zeros.
      dx = np.zeros((output_time.shape[0],output_xy.shape[0])) 
      sdx = np.zeros((output_time.shape[0],output_xy.shape[0])) 
      dy = np.zeros((output_time.shape[0],output_xy.shape[0])) 
      sdy = np.zeros((output_time.shape[0],output_xy.shape[0])) 
      if covariance:
        # if covariance is True then create an empty array of
        # covariances
        cdx = np.zeros((output_time.shape[0],output_xy.shape[0],
                        output_time.shape[0],output_xy.shape[0]))
        cdy = np.zeros((output_time.shape[0],output_xy.shape[0],
                        output_time.shape[0],output_xy.shape[0]))
        soln = (dx,sdx,cdx,dy,sdy,cdy)

      else:
        soln = (dx,sdx,dy,sdy)
              
    else:      
      soln = strain(t=data['time'][:,None],
                    x=xy,
                    d=data[dir],
                    sd=data[dir+'_std_dev'],
                    network_prior_model=network_prior_model,
                    network_prior_params=network_prior_params[dir],
                    network_noise_model=network_noise_model,
                    network_noise_params=network_noise_params[dir],
                    station_noise_model=station_noise_model,
                    station_noise_params=station_noise_params[dir],
                    out_t=output_time[:,None],
                    out_x=output_xy,
                    rate=rate,
                    covariance=covariance)

    if covariance:
      # soln contains six entries when covariance is True
      dx,sdx,cdx,dy,sdy,cdy = soln
      out_dx[dir] = dx
      out_dx[dir+'_std_dev'] = sdx
      out_dx[dir+'_covariance'] = cdx
      out_dy[dir] = dy
      out_dy[dir+'_std_dev'] = sdy
      out_dy[dir+'_covariance'] = cdy

    else:      
      # soln contains four entries when covariance is False
      dx,sdx,dy,sdy = soln
      out_dx[dir] = dx
      out_dx[dir+'_std_dev'] = sdx
      out_dy[dir] = dy
      out_dy[dir+'_std_dev'] = sdy

  out_dx['time'] = output_time
  out_dx['longitude'] = output_lon
  out_dx['latitude'] = output_lat
  out_dx['id'] = output_id
  out_dx['time_exponent'] = -int(rate)
  out_dx['space_exponent'] = 0
  
  out_dy['time'] = output_time
  out_dy['longitude'] = output_lon
  out_dy['latitude'] = output_lat
  out_dy['id'] = output_id
  out_dy['time_exponent'] = -int(rate)
  out_dy['space_exponent'] = 0

  hdf5_from_dict(output_dx_file,out_dx)
  hdf5_from_dict(output_dy_file,out_dy)
  if rate:
    logger.info('Posterior velocity gradients written to %s and %s' % (output_dx_file,output_dy_file))

  else:  
    logger.info('Posterior displacement gradients written to %s and %s' % (output_dx_file,output_dy_file))

  return
Пример #11
0
def pygeons_clean(input_file,resolution='i',
                  input_edits_file=None,
                  break_lons=None,break_lats=None,
                  break_conn=None,no_display=False,
                  output_stem=None,**kwargs):
  ''' 
  runs the PyGeoNS Interactive Cleaner
  
  Parameters
  ----------
    data : dict
      data dictionary

    resolution : str
      basemap resolution    
    
    input_edits_file : str
      Name of the file containing edits which will automatically be 
      applied before opening up the interactive viewer.
    
    output_edits_file : str
      Name of the file where all edits will be recorded.   
      
    **kwargs : 
      gets passed to pygeons.clean.clean
         
  Returns
  -------
    out : dict
      output data dictionary 
    
  '''
  logger.info('Running pygeons clean ...')
  data = dict_from_hdf5(input_file)
  out = dict((k,np.copy(v)) for k,v in data.iteritems())

  ts_fig,ts_ax = plt.subplots(3,1,sharex=True,num='Time Series View',facecolor='white')
  _setup_ts_ax(ts_ax)
  map_fig,map_ax = plt.subplots(num='Map View',facecolor='white')
  bm = make_basemap(data['longitude'],data['latitude'],resolution=resolution)
  _setup_map_ax(bm,map_ax)
  x,y = bm(data['longitude'],data['latitude'])
  pos = np.array([x,y]).T
  t = data['time']
  dates = [mjd_inv(ti,'%Y-%m-%d') for ti in t]
  units = _unit_string(data['space_exponent'],data['time_exponent'])
  conv = 1.0/unit_conversion(units,time='day',space='m')
  u = conv*data['east']
  v = conv*data['north']
  z = conv*data['vertical']
  su = conv*data['east_std_dev']
  sv = conv*data['north_std_dev']
  sz = conv*data['vertical_std_dev']
  ic = InteractiveCleaner(
         t,pos,u=u,v=v,z=z,su=su,sv=sv,sz=sz,
         map_ax=map_ax,ts_ax=ts_ax,
         time_labels=dates,
         units=units,
         station_labels=data['id'],
         **kwargs)

  # make edits to the data set prior to displaying it
  if input_edits_file is not None:
    with open(input_edits_file,'r') as fin:
      for line in fin: 
        # ignore blank lines
        if line.isspace():
          continue
          
        type,sta,a,b = line.strip().split()
        # set the current station in *ic* to the station for this edit
        xidx, = (data['id'] == sta).nonzero()
        if len(xidx) == 0:
          # continue because the station does not exist in this 
          # dataset
          continue
          
        ic.xidx = xidx[0]
        if type == 'outliers':
          start_time = mjd(a,'%Y-%m-%d')
          stop_time = mjd(b,'%Y-%m-%d')
          ic.remove_outliers(start_time,stop_time)
        elif type == 'jump':
          jump_time = mjd(a,'%Y-%m-%d')
          delta = int(b)
          ic.remove_jump(jump_time,delta)
        else:
          raise ValueError('edit type must be either "outliers" or "jump"')

  if not no_display:
    ic.update()
    ic.connect()
    
  # set output file name
  if output_stem is None:
    output_stem = _remove_extension(input_file) + '.clean'

  output_file = output_stem + '.h5'
  output_edits_file = output_stem + '.txt'
  
  with open(output_edits_file,'w') as fout:
    for i in ic.log:
      type,xidx,a,b = i
      if type == 'outliers':
        station = data['id'][xidx]
        start_date = mjd_inv(a,'%Y-%m-%d')
        stop_date = mjd_inv(b,'%Y-%m-%d')
        fout.write('outliers %s %s %s\n' % (station,start_date,stop_date))
      elif type == 'jump':
        station = data['id'][xidx]
        jump_date = mjd_inv(a,'%Y-%m-%d')
        fout.write('jump     %s %s %s\n' % (station,jump_date,b))
      else:
        raise ValueError('edit type must be either "outliers" or "jump"')
        
  logger.info('Edits saved to %s' % output_edits_file)
  clean_data  = ic.get_data()                 
  out['east'] = clean_data[0]/conv
  out['north'] = clean_data[1]/conv
  out['vertical'] = clean_data[2]/conv
  out['east_std_dev'] = clean_data[3]/conv
  out['north_std_dev'] = clean_data[4]/conv
  out['vertical_std_dev'] = clean_data[5]/conv

  hdf5_from_dict(output_file,out)  
  logger.info('Cleaned data written to %s' % output_file)
  logger.info('Edits written to %s' % output_edits_file)
  return 
Пример #12
0
def pygeons_crop(input_file,start_date=None,stop_date=None,
                 min_lat=-np.inf,max_lat=np.inf,
                 min_lon=-np.inf,max_lon=np.inf,
                 stations=None,output_stem=None):
  ''' 
  Sets the time span of the data set to be between *start_date* and
  *stop_date*. Sets the stations to be within the latitude and
  longitude bounds. 
  
  Parameters
  ----------
  data : dict
    data dictionary
      
  start_date : str, optional
    start date of output data set in YYYY-MM-DD. Uses the start date 
    of *data* if not provided. Defaults to the earliest date.

  stop_date : str, optional
    Stop date of output data set in YYYY-MM-DD. Uses the stop date 
    of *data* if not provided. Defaults to the latest date.
      
  min_lon, max_lon, min_lat, max_lat : float, optional
    Spatial bounds on the output data set
  
  stations : str list, optional
    List of stations to be removed from the dataset. This is in 
    addition to the station removed by the lon/lat bounds.
    
  Returns
  -------
  out_dict : dict
    output data dictionary

  '''
  logger.info('Running pygeons crop ...')
  data = dict_from_hdf5(input_file)
  out = dict((k,np.copy(v)) for k,v in data.iteritems())

  if start_date is None:
    start_date = mjd.mjd_inv(data['time'].min(),'%Y-%m-%d')

  if stop_date is None:
    stop_date = mjd.mjd_inv(data['time'].max(),'%Y-%m-%d')

  if stations is None:
    stations = []

  # remove times that are not within the bounds of *start_date* and 
  # *stop_date*
  start_time = int(mjd.mjd(start_date,'%Y-%m-%d'))
  stop_time = int(mjd.mjd(stop_date,'%Y-%m-%d'))
  idx = ((data['time'] >= start_time) &
         (data['time'] <= stop_time))
  out['time'] = out['time'][idx]
  for dir in ['east','north','vertical']:
    out[dir] = out[dir][idx,:]
    out[dir + '_std_dev'] = out[dir + '_std_dev'][idx,:]

  # find stations that are within the bounds
  in_bounds = ((data['longitude'] > min_lon) &
               (data['longitude'] < max_lon) &
               (data['latitude'] > min_lat) &
               (data['latitude'] < max_lat))
  # find stations that are in the list of stations to be removed
  in_list = np.array([i in stations for i in data['id']])
  # keep stations that are in bounds and not in the list
  idx, = (in_bounds & ~in_list).nonzero()

  out['id'] = out['id'][idx]
  out['longitude'] = out['longitude'][idx]
  out['latitude'] = out['latitude'][idx]
  for dir in ['east','north','vertical']:
    out[dir] = out[dir][:,idx]
    out[dir + '_std_dev'] = out[dir + '_std_dev'][:,idx]

  # set output file name
  if output_stem is None:
    output_stem = _remove_extension(input_file) + '.crop'

  output_file = output_stem + '.h5'
  hdf5_from_dict(output_file,out)
  logger.info('Cropped data written to %s' % output_file)
  return