Exemplo n.º 1
0
 def coord_formatter(x,y):                         
   ''' 
   Takes coordinates, *x* and *y*, and returns their string 
   representation
   '''
   out = 'time : %g  ' % x
   out += '(date : %s)' % mjd_inv(x,'%Y-%m-%d')
   return out
Exemplo n.º 2
0
def pygeons_info(input_file):
    ''' 
  prints metadata 
  '''
    logger.info('Running pygeons info ...')
    data_dict = dict_from_hdf5(input_file)
    # put together info string
    units = _unit_string(data_dict['space_exponent'],
                         data_dict['time_exponent'])
    stations = str(len(data_dict['id']))
    times = str(len(data_dict['time']))
    observations = (np.sum(~np.isinf(data_dict['east_std_dev'])) +
                    np.sum(~np.isinf(data_dict['north_std_dev'])) +
                    np.sum(~np.isinf(data_dict['vertical_std_dev'])))

    time_range = '%s, %s' % (mjd.mjd_inv(data_dict['time'][0], '%Y-%m-%d'),
                             mjd.mjd_inv(data_dict['time'][-1], '%Y-%m-%d'))
    lon_range = '%s, %s' % (np.min(
        data_dict['longitude']), np.max(data_dict['longitude']))
    lat_range = '%s, %s' % (np.min(
        data_dict['latitude']), np.max(data_dict['latitude']))
    # split names into groups of no more than 8
    station_name_list = list(data_dict['id'])
    station_name_groups = []
    while len(station_name_list) > 0:
        station_name_groups += [', '.join(station_name_list[:7])]
        station_name_list = station_name_list[7:]

    msg = '\n'
    msg += '------------------ PYGEONS DATA INFORMATION ------------------\n\n'
    msg += 'file : %s\n' % input_file
    msg += 'units : %s\n' % units
    msg += 'stations : %s\n' % stations
    msg += 'times : %s\n' % times
    msg += 'observations : %s\n' % observations
    msg += 'time range : %s\n' % time_range
    msg += 'longitude range : %s\n' % lon_range
    msg += 'latitude range : %s\n' % lat_range
    msg += 'station names : %s\n' % station_name_groups[0]
    for g in station_name_groups[1:]:
        msg += '                %s\n' % g

    msg += '\n'
    msg += '--------------------------------------------------------------'
    print(msg)
    return
Exemplo n.º 3
0
def pygeons_info(input_file):
  ''' 
  prints metadata 
  '''
  logger.info('Running pygeons info ...')
  data_dict = dict_from_hdf5(input_file)
  # put together info string
  units = _unit_string(data_dict['space_exponent'],
                       data_dict['time_exponent'])
  stations = str(len(data_dict['id']))
  times = str(len(data_dict['time']))
  observations = (np.sum(~np.isinf(data_dict['east_std_dev'])) +
                  np.sum(~np.isinf(data_dict['north_std_dev'])) +
                  np.sum(~np.isinf(data_dict['vertical_std_dev'])))

  time_range = '%s, %s' % (mjd.mjd_inv(data_dict['time'][0],'%Y-%m-%d'),
                           mjd.mjd_inv(data_dict['time'][-1],'%Y-%m-%d'))
  lon_range = '%s, %s' % (np.min(data_dict['longitude']),
                          np.max(data_dict['longitude']))
  lat_range = '%s, %s' % (np.min(data_dict['latitude']),
                          np.max(data_dict['latitude']))
  # split names into groups of no more than 8
  station_name_list = list(data_dict['id'])
  station_name_groups = []
  while len(station_name_list) > 0:
    station_name_groups += [', '.join(station_name_list[:7])]
    station_name_list = station_name_list[7:]

  msg  = '\n'
  msg += '------------------ PYGEONS DATA INFORMATION ------------------\n\n'
  msg += 'file : %s\n' % input_file
  msg += 'units : %s\n' % units
  msg += 'stations : %s\n' % stations
  msg += 'times : %s\n' % times
  msg += 'observations : %s\n' % observations
  msg += 'time range : %s\n' % time_range
  msg += 'longitude range : %s\n' % lon_range
  msg += 'latitude range : %s\n' % lat_range
  msg += 'station names : %s\n' % station_name_groups[0]
  for g in station_name_groups[1:]:
    msg += '                %s\n' % g

  msg += '\n'  
  msg += '--------------------------------------------------------------'
  print(msg)
  return
Exemplo n.º 4
0
def pygeons_strain_view(xdiff_file,ydiff_file,map_resolution='i',**kwargs):
  ''' 
  runs the PyGeoNS Interactive Strain Viewer
  
  Parameters
  ----------
    xdiff_file : str

    ydiff_file : str
      
    map_resolution : str
      basemap resolution
      
    **kwargs :
      gets passed to pygeons.strain.view

  '''
  logger.info('Running pygeons strain-view ...')
  data_dx = dict_from_hdf5(xdiff_file)  
  data_dy = dict_from_hdf5(ydiff_file)  
  data_dx,data_dy = _common_context([data_dx,data_dy])
  
  if ((data_dx['space_exponent'] != 0) | 
      (data_dy['space_exponent'] != 0)):
    raise ValueError('The input datasets cannot have spatial units')
  
  t = data_dx['time']
  id = data_dx['id']
  lon = data_dx['longitude']
  lat = data_dx['latitude']
  dates = [mjd_inv(ti,'%Y-%m-%d') for ti in t]
  units = _unit_string(data_dx['space_exponent'],
                       data_dx['time_exponent'])
  # factor that converts units of days and m to the units in *units*
  conv = 1.0/unit_conversion(units,time='day',space='m')
  exx = conv*data_dx['east'] 
  eyy = conv*data_dy['north']
  exy = 0.5*conv*(data_dx['north'] + data_dy['east'])
  sxx = conv*data_dx['east_std_dev']
  syy = conv*data_dy['north_std_dev']
  sxy = 0.5*conv*np.sqrt(data_dx['north_std_dev']**2 + data_dy['east_std_dev']**2)

  ts_fig,ts_ax = plt.subplots(3,1,sharex=True,num='Time Series View',
                              facecolor='white')
  _setup_ts_ax(ts_ax)
  map_fig,map_ax = plt.subplots(num='Map View',facecolor='white')
  bm = make_basemap(lon,lat,resolution=map_resolution)
  _setup_map_ax(bm,map_ax)
  x,y = bm(lon,lat)
  pos = np.array([x,y]).T
  interactive_strain_viewer(
    t,pos,exx,eyy,exy,sxx=sxx,syy=syy,sxy=sxy,
    map_ax=map_ax,ts_ax=ts_ax,time_labels=dates,
    station_labels=id,units=units,**kwargs)

  return
Exemplo n.º 5
0
def pygeons_vector_view(input_files,map_resolution='i',**kwargs):
  ''' 
  runs the PyGeoNS interactive vector viewer
  
  Parameters
  ----------
    data_list : (N,) list of dicts
      list of data dictionaries being plotted
      
    map_resolution : str
      basemap resolution
    
    **kwargs :
      gets passed to pygeons.plot.view.interactive_view

  '''
  logger.info('Running pygeons vector-view ...')
  data_list = [dict_from_hdf5(i) for i in input_files]
  data_list = _common_context(data_list)
  
  # use filenames for dataset labels if none were provided
  dataset_labels = kwargs.pop('dataset_labels',input_files)

  t = data_list[0]['time']
  lon = data_list[0]['longitude']
  lat = data_list[0]['latitude']
  id = data_list[0]['id']
  dates = [mjd_inv(ti,'%Y-%m-%d') for ti in t]
  units = _unit_string(data_list[0]['space_exponent'],
                       data_list[0]['time_exponent'])
  # factor that converts units of days and m to the units in *units*
  conv = 1.0/unit_conversion(units,time='day',space='m')
  u = [conv*d['east'] for d in data_list]
  v = [conv*d['north'] for d in data_list]
  z = [conv*d['vertical'] for d in data_list]
  su = [conv*d['east_std_dev'] for d in data_list]
  sv = [conv*d['north_std_dev'] for d in data_list]
  sz = [conv*d['vertical_std_dev'] for d in data_list]
  ts_fig,ts_ax = plt.subplots(3,1,sharex=True,num='Time Series View',
                              facecolor='white')
  _setup_ts_ax(ts_ax)
  map_fig,map_ax = plt.subplots(num='Map View',facecolor='white')
  bm = make_basemap(lon,lat,resolution=map_resolution)
  _setup_map_ax(bm,map_ax)
  x,y = bm(lon,lat)
  pos = np.array([x,y]).T
  interactive_vector_viewer(
    t,pos,u=u,v=v,z=z,su=su,sv=sv,sz=sz,
    ts_ax=ts_ax,map_ax=map_ax,
    dataset_labels=dataset_labels,
    station_labels=id,time_labels=dates,
    units=units,**kwargs)

  return
Exemplo n.º 6
0
 def xtick_formatter(x,p):
   ''' 
   Takes *x* and the number of ticks, *p*, and returns a string 
   representation of *x*
   '''
   try:
     out = mjd_inv(x,'%Y-%m-%d')
   except (ValueError,OverflowError):
     out = ''  
     
   return out
Exemplo n.º 7
0
def _write_csv(data):
  ''' 
  Write data for a single station to a csv file
  '''
  time = data['time']
  out  = '4-character id, %s\n' % data['id']
  out += 'begin date, %s\n' % mjd_inv(time[0],'%Y-%m-%d')
  out += 'end date, %s\n' % mjd_inv(time[-1],'%Y-%m-%d')
  out += 'longitude, %s E\n' % data['longitude']
  out += 'latitude, %s N\n' % data['latitude']
  out += ('units, meters**%s days**%s\n' % 
          (data['space_exponent'],data['time_exponent']))
  out += ('date, north, east, vertical, north std. deviation, '
          'east std. deviation, vertical std. deviation\n')
  # convert displacements and uncertainties to strings
  for i in range(len(data['time'])):
    date_str = mjd_inv(time[i],'%Y-%m-%d')
    out += ('%s, %e, %e, %e, %e, %e, %e\n' % 
            (date_str,data['north'][i],data['east'][i],
             data['vertical'][i],data['north_std_dev'][i],
             data['east_std_dev'][i],data['vertical_std_dev'][i]))

  return out             
Exemplo n.º 8
0
def check_unique_dates(data):
  ''' 
  makes sure each date is unique
  '''
  unique_days = list(set(data['time']))
  if len(data['time']) != len(unique_days):
    # there are duplicate dates, now find them
    duplicates = []
    for i in unique_days:
      if sum(data['time'] == i) > 1:
        duplicates += [mjd_inv(i,'%Y-%m-%d')]
        
    duplicates = ', '.join(duplicates)
    raise DataError(
      'Dataset contains the following duplicate dates : %s ' 
      % duplicates) 
Exemplo n.º 9
0
def pygeons_clean(input_file,
                  resolution='i',
                  input_edits_file=None,
                  break_lons=None,
                  break_lats=None,
                  break_conn=None,
                  no_display=False,
                  output_stem=None,
                  **kwargs):
    ''' 
  runs the PyGeoNS Interactive Cleaner
  
  Parameters
  ----------
    data : dict
      data dictionary

    resolution : str
      basemap resolution    
    
    input_edits_file : str
      Name of the file containing edits which will automatically be 
      applied before opening up the interactive viewer.
    
    output_edits_file : str
      Name of the file where all edits will be recorded.   
      
    **kwargs : 
      gets passed to pygeons.clean.clean
         
  Returns
  -------
    out : dict
      output data dictionary 
    
  '''
    logger.info('Running pygeons clean ...')
    data = dict_from_hdf5(input_file)
    out = dict((k, np.copy(v)) for k, v in data.iteritems())

    ts_fig, ts_ax = plt.subplots(3,
                                 1,
                                 sharex=True,
                                 num='Time Series View',
                                 facecolor='white')
    _setup_ts_ax(ts_ax)
    map_fig, map_ax = plt.subplots(num='Map View', facecolor='white')
    bm = make_basemap(data['longitude'],
                      data['latitude'],
                      resolution=resolution)
    _setup_map_ax(bm, map_ax)
    x, y = bm(data['longitude'], data['latitude'])
    pos = np.array([x, y]).T
    t = data['time']
    dates = [mjd_inv(ti, '%Y-%m-%d') for ti in t]
    units = _unit_string(data['space_exponent'], data['time_exponent'])
    conv = 1.0 / unit_conversion(units, time='day', space='m')
    u = conv * data['east']
    v = conv * data['north']
    z = conv * data['vertical']
    su = conv * data['east_std_dev']
    sv = conv * data['north_std_dev']
    sz = conv * data['vertical_std_dev']
    ic = InteractiveCleaner(t,
                            pos,
                            u=u,
                            v=v,
                            z=z,
                            su=su,
                            sv=sv,
                            sz=sz,
                            map_ax=map_ax,
                            ts_ax=ts_ax,
                            time_labels=dates,
                            units=units,
                            station_labels=data['id'],
                            **kwargs)

    # make edits to the data set prior to displaying it
    if input_edits_file is not None:
        with open(input_edits_file, 'r') as fin:
            for line in fin:
                # ignore blank lines
                if line.isspace():
                    continue

                type, sta, a, b = line.strip().split()
                # set the current station in *ic* to the station for this edit
                xidx, = (data['id'] == sta).nonzero()
                if len(xidx) == 0:
                    # continue because the station does not exist in this
                    # dataset
                    continue

                ic.xidx = xidx[0]
                if type == 'outliers':
                    start_time = mjd(a, '%Y-%m-%d')
                    stop_time = mjd(b, '%Y-%m-%d')
                    ic.remove_outliers(start_time, stop_time)
                elif type == 'jump':
                    jump_time = mjd(a, '%Y-%m-%d')
                    delta = int(b)
                    ic.remove_jump(jump_time, delta)
                else:
                    raise ValueError(
                        'edit type must be either "outliers" or "jump"')

    if not no_display:
        ic.update()
        ic.connect()

    # set output file name
    if output_stem is None:
        output_stem = _remove_extension(input_file) + '.clean'

    output_file = output_stem + '.h5'
    output_edits_file = output_stem + '.txt'

    with open(output_edits_file, 'w') as fout:
        for i in ic.log:
            type, xidx, a, b = i
            if type == 'outliers':
                station = data['id'][xidx]
                start_date = mjd_inv(a, '%Y-%m-%d')
                stop_date = mjd_inv(b, '%Y-%m-%d')
                fout.write('outliers %s %s %s\n' %
                           (station, start_date, stop_date))
            elif type == 'jump':
                station = data['id'][xidx]
                jump_date = mjd_inv(a, '%Y-%m-%d')
                fout.write('jump     %s %s %s\n' % (station, jump_date, b))
            else:
                raise ValueError(
                    'edit type must be either "outliers" or "jump"')

    logger.info('Edits saved to %s' % output_edits_file)
    clean_data = ic.get_data()
    out['east'] = clean_data[0] / conv
    out['north'] = clean_data[1] / conv
    out['vertical'] = clean_data[2] / conv
    out['east_std_dev'] = clean_data[3] / conv
    out['north_std_dev'] = clean_data[4] / conv
    out['vertical_std_dev'] = clean_data[5] / conv

    hdf5_from_dict(output_file, out)
    logger.info('Cleaned data written to %s' % output_file)
    logger.info('Edits written to %s' % output_edits_file)
    return
Exemplo n.º 10
0
def pygeons_crop(input_file,
                 start_date=None,
                 stop_date=None,
                 min_lat=-np.inf,
                 max_lat=np.inf,
                 min_lon=-np.inf,
                 max_lon=np.inf,
                 stations=None,
                 output_stem=None):
    ''' 
  Sets the time span of the data set to be between *start_date* and
  *stop_date*. Sets the stations to be within the latitude and
  longitude bounds. 
  
  Parameters
  ----------
  data : dict
    data dictionary
      
  start_date : str, optional
    start date of output data set in YYYY-MM-DD. Uses the start date 
    of *data* if not provided. Defaults to the earliest date.

  stop_date : str, optional
    Stop date of output data set in YYYY-MM-DD. Uses the stop date 
    of *data* if not provided. Defaults to the latest date.
      
  min_lon, max_lon, min_lat, max_lat : float, optional
    Spatial bounds on the output data set
  
  stations : str list, optional
    List of stations to be removed from the dataset. This is in 
    addition to the station removed by the lon/lat bounds.
    
  Returns
  -------
  out_dict : dict
    output data dictionary

  '''
    logger.info('Running pygeons crop ...')
    data = dict_from_hdf5(input_file)
    out = dict((k, np.copy(v)) for k, v in data.iteritems())

    if start_date is None:
        start_date = mjd.mjd_inv(data['time'].min(), '%Y-%m-%d')

    if stop_date is None:
        stop_date = mjd.mjd_inv(data['time'].max(), '%Y-%m-%d')

    if stations is None:
        stations = []

    # remove times that are not within the bounds of *start_date* and
    # *stop_date*
    start_time = int(mjd.mjd(start_date, '%Y-%m-%d'))
    stop_time = int(mjd.mjd(stop_date, '%Y-%m-%d'))
    idx = ((data['time'] >= start_time) & (data['time'] <= stop_time))
    out['time'] = out['time'][idx]
    for dir in ['east', 'north', 'vertical']:
        out[dir] = out[dir][idx, :]
        out[dir + '_std_dev'] = out[dir + '_std_dev'][idx, :]

    # find stations that are within the bounds
    in_bounds = ((data['longitude'] > min_lon) & (data['longitude'] < max_lon)
                 & (data['latitude'] > min_lat) & (data['latitude'] < max_lat))
    # find stations that are in the list of stations to be removed
    in_list = np.array([i in stations for i in data['id']])
    # keep stations that are in bounds and not in the list
    idx, = (in_bounds & ~in_list).nonzero()

    out['id'] = out['id'][idx]
    out['longitude'] = out['longitude'][idx]
    out['latitude'] = out['latitude'][idx]
    for dir in ['east', 'north', 'vertical']:
        out[dir] = out[dir][:, idx]
        out[dir + '_std_dev'] = out[dir + '_std_dev'][:, idx]

    # set output file name
    if output_stem is None:
        output_stem = _remove_extension(input_file) + '.crop'

    output_file = output_stem + '.h5'
    hdf5_from_dict(output_file, out)
    logger.info('Cropped data written to %s' % output_file)
    return
Exemplo n.º 11
0
def pygeons_strain(input_file,
                   network_prior_model=('spwen12-se',),
                   network_prior_params=(1.0,0.1,100.0),
                   network_noise_model=(),
                   network_noise_params=(),
                   station_noise_model=('linear',),
                   station_noise_params=(),
                   start_date=None,stop_date=None,
                   positions=None,positions_file=None,
                   rate=True,vertical=True,covariance=False,
                   output_stem=None):
  ''' 
  calculates strain
  '''
  logger.info('Running pygeons strain ...')
  data = dict_from_hdf5(input_file)
  if data['time_exponent'] != 0:
    raise ValueError('input dataset must have units of displacement')

  if data['space_exponent'] != 1:
    raise ValueError('input dataset must have units of displacement')
    
  out_dx = dict((k,np.copy(v)) for k,v in data.iteritems())
  out_dy = dict((k,np.copy(v)) for k,v in data.iteritems())

  # convert params to a dictionary of hyperparameters for each direction
  network_prior_params = _params_dict(network_prior_params)
  network_noise_params = _params_dict(network_noise_params)
  station_noise_params = _params_dict(station_noise_params)
  
  # convert geodetic input positions to cartesian
  bm = make_basemap(data['longitude'],data['latitude'])
  x,y = bm(data['longitude'],data['latitude'])
  xy = np.array([x,y]).T

  # set output positions
  if (positions is None) & (positions_file is None):
    # no output positions were specified so return the solution at the
    # input data positions
    output_id = np.array(data['id'],copy=True)
    output_lon = np.array(data['longitude'],copy=True)
    output_lat = np.array(data['latitude'],copy=True)

  else:  
    output_id = np.zeros((0,),dtype=str)
    output_lon = np.zeros((0,),dtype=float)
    output_lat = np.zeros((0,),dtype=float)
    if positions_file is not None:
      # if positions file was specified
      pos = np.loadtxt(positions_file,dtype=str,ndmin=2)
      if pos.shape[1] != 3:
        raise ValueError(
          'positions file must contain a column for IDs, longitudes, '
          'and latitudes')
          
      output_id = np.hstack((output_id,pos[:,0]))
      output_lon = np.hstack((output_lon,pos[:,1].astype(float)))
      output_lat = np.hstack((output_lat,pos[:,2].astype(float)))
    
    if positions is not None:  
      # if positions were specified via the command line
      pos = np.array(positions,dtype=str).reshape((-1,3))
      output_id = np.hstack((output_id,pos[:,0]))
      output_lon = np.hstack((output_lon,pos[:,1].astype(float)))
      output_lat = np.hstack((output_lat,pos[:,2].astype(float)))

  # convert geodetic output positions to cartesian
  output_x,output_y = bm(output_lon,output_lat)
  output_xy = np.array([output_x,output_y]).T 
  
  # set output times
  if start_date is None:
    start_date = mjd_inv(np.min(data['time']),'%Y-%m-%d')

  if stop_date is None:
    stop_date = mjd_inv(np.max(data['time']),'%Y-%m-%d')
  
  start_time = mjd(start_date,'%Y-%m-%d')  
  stop_time = mjd(stop_date,'%Y-%m-%d')  
  output_time = np.arange(start_time,stop_time+1)
  
  # set output file names
  if output_stem is None:
    output_stem = _remove_extension(input_file) + '.strain'

  output_dx_file = output_stem + '.dudx.h5'
  output_dy_file = output_stem + '.dudy.h5'

  _log_strain(input_file,
              network_prior_model,network_prior_params, 
              network_noise_model,network_noise_params, 
              station_noise_model,station_noise_params, 
              start_date,stop_date,output_id,rate,vertical,
              covariance,output_dx_file,output_dy_file)

  for dir in ['east','north','vertical']:
    if (dir == 'vertical') & (not vertical):
      logger.debug('Not computing vertical deformation gradients')
      # do not compute the deformation gradients for vertical. Just
      # return zeros.
      dx = np.zeros((output_time.shape[0],output_xy.shape[0])) 
      sdx = np.zeros((output_time.shape[0],output_xy.shape[0])) 
      dy = np.zeros((output_time.shape[0],output_xy.shape[0])) 
      sdy = np.zeros((output_time.shape[0],output_xy.shape[0])) 
      if covariance:
        # if covariance is True then create an empty array of
        # covariances
        cdx = np.zeros((output_time.shape[0],output_xy.shape[0],
                        output_time.shape[0],output_xy.shape[0]))
        cdy = np.zeros((output_time.shape[0],output_xy.shape[0],
                        output_time.shape[0],output_xy.shape[0]))
        soln = (dx,sdx,cdx,dy,sdy,cdy)

      else:
        soln = (dx,sdx,dy,sdy)
              
    else:      
      soln = strain(t=data['time'][:,None],
                    x=xy,
                    d=data[dir],
                    sd=data[dir+'_std_dev'],
                    network_prior_model=network_prior_model,
                    network_prior_params=network_prior_params[dir],
                    network_noise_model=network_noise_model,
                    network_noise_params=network_noise_params[dir],
                    station_noise_model=station_noise_model,
                    station_noise_params=station_noise_params[dir],
                    out_t=output_time[:,None],
                    out_x=output_xy,
                    rate=rate,
                    covariance=covariance)

    if covariance:
      # soln contains six entries when covariance is True
      dx,sdx,cdx,dy,sdy,cdy = soln
      out_dx[dir] = dx
      out_dx[dir+'_std_dev'] = sdx
      out_dx[dir+'_covariance'] = cdx
      out_dy[dir] = dy
      out_dy[dir+'_std_dev'] = sdy
      out_dy[dir+'_covariance'] = cdy

    else:      
      # soln contains four entries when covariance is False
      dx,sdx,dy,sdy = soln
      out_dx[dir] = dx
      out_dx[dir+'_std_dev'] = sdx
      out_dy[dir] = dy
      out_dy[dir+'_std_dev'] = sdy

  out_dx['time'] = output_time
  out_dx['longitude'] = output_lon
  out_dx['latitude'] = output_lat
  out_dx['id'] = output_id
  out_dx['time_exponent'] = -int(rate)
  out_dx['space_exponent'] = 0
  
  out_dy['time'] = output_time
  out_dy['longitude'] = output_lon
  out_dy['latitude'] = output_lat
  out_dy['id'] = output_id
  out_dy['time_exponent'] = -int(rate)
  out_dy['space_exponent'] = 0

  hdf5_from_dict(output_dx_file,out_dx)
  hdf5_from_dict(output_dy_file,out_dy)
  if rate:
    logger.info('Posterior velocity gradients written to %s and %s' % (output_dx_file,output_dy_file))

  else:  
    logger.info('Posterior displacement gradients written to %s and %s' % (output_dx_file,output_dy_file))

  return
Exemplo n.º 12
0
def pygeons_clean(input_file,resolution='i',
                  input_edits_file=None,
                  break_lons=None,break_lats=None,
                  break_conn=None,no_display=False,
                  output_stem=None,**kwargs):
  ''' 
  runs the PyGeoNS Interactive Cleaner
  
  Parameters
  ----------
    data : dict
      data dictionary

    resolution : str
      basemap resolution    
    
    input_edits_file : str
      Name of the file containing edits which will automatically be 
      applied before opening up the interactive viewer.
    
    output_edits_file : str
      Name of the file where all edits will be recorded.   
      
    **kwargs : 
      gets passed to pygeons.clean.clean
         
  Returns
  -------
    out : dict
      output data dictionary 
    
  '''
  logger.info('Running pygeons clean ...')
  data = dict_from_hdf5(input_file)
  out = dict((k,np.copy(v)) for k,v in data.iteritems())

  ts_fig,ts_ax = plt.subplots(3,1,sharex=True,num='Time Series View',facecolor='white')
  _setup_ts_ax(ts_ax)
  map_fig,map_ax = plt.subplots(num='Map View',facecolor='white')
  bm = make_basemap(data['longitude'],data['latitude'],resolution=resolution)
  _setup_map_ax(bm,map_ax)
  x,y = bm(data['longitude'],data['latitude'])
  pos = np.array([x,y]).T
  t = data['time']
  dates = [mjd_inv(ti,'%Y-%m-%d') for ti in t]
  units = _unit_string(data['space_exponent'],data['time_exponent'])
  conv = 1.0/unit_conversion(units,time='day',space='m')
  u = conv*data['east']
  v = conv*data['north']
  z = conv*data['vertical']
  su = conv*data['east_std_dev']
  sv = conv*data['north_std_dev']
  sz = conv*data['vertical_std_dev']
  ic = InteractiveCleaner(
         t,pos,u=u,v=v,z=z,su=su,sv=sv,sz=sz,
         map_ax=map_ax,ts_ax=ts_ax,
         time_labels=dates,
         units=units,
         station_labels=data['id'],
         **kwargs)

  # make edits to the data set prior to displaying it
  if input_edits_file is not None:
    with open(input_edits_file,'r') as fin:
      for line in fin: 
        # ignore blank lines
        if line.isspace():
          continue
          
        type,sta,a,b = line.strip().split()
        # set the current station in *ic* to the station for this edit
        xidx, = (data['id'] == sta).nonzero()
        if len(xidx) == 0:
          # continue because the station does not exist in this 
          # dataset
          continue
          
        ic.xidx = xidx[0]
        if type == 'outliers':
          start_time = mjd(a,'%Y-%m-%d')
          stop_time = mjd(b,'%Y-%m-%d')
          ic.remove_outliers(start_time,stop_time)
        elif type == 'jump':
          jump_time = mjd(a,'%Y-%m-%d')
          delta = int(b)
          ic.remove_jump(jump_time,delta)
        else:
          raise ValueError('edit type must be either "outliers" or "jump"')

  if not no_display:
    ic.update()
    ic.connect()
    
  # set output file name
  if output_stem is None:
    output_stem = _remove_extension(input_file) + '.clean'

  output_file = output_stem + '.h5'
  output_edits_file = output_stem + '.txt'
  
  with open(output_edits_file,'w') as fout:
    for i in ic.log:
      type,xidx,a,b = i
      if type == 'outliers':
        station = data['id'][xidx]
        start_date = mjd_inv(a,'%Y-%m-%d')
        stop_date = mjd_inv(b,'%Y-%m-%d')
        fout.write('outliers %s %s %s\n' % (station,start_date,stop_date))
      elif type == 'jump':
        station = data['id'][xidx]
        jump_date = mjd_inv(a,'%Y-%m-%d')
        fout.write('jump     %s %s %s\n' % (station,jump_date,b))
      else:
        raise ValueError('edit type must be either "outliers" or "jump"')
        
  logger.info('Edits saved to %s' % output_edits_file)
  clean_data  = ic.get_data()                 
  out['east'] = clean_data[0]/conv
  out['north'] = clean_data[1]/conv
  out['vertical'] = clean_data[2]/conv
  out['east_std_dev'] = clean_data[3]/conv
  out['north_std_dev'] = clean_data[4]/conv
  out['vertical_std_dev'] = clean_data[5]/conv

  hdf5_from_dict(output_file,out)  
  logger.info('Cleaned data written to %s' % output_file)
  logger.info('Edits written to %s' % output_edits_file)
  return 
Exemplo n.º 13
0
def pygeons_crop(input_file,start_date=None,stop_date=None,
                 min_lat=-np.inf,max_lat=np.inf,
                 min_lon=-np.inf,max_lon=np.inf,
                 stations=None,output_stem=None):
  ''' 
  Sets the time span of the data set to be between *start_date* and
  *stop_date*. Sets the stations to be within the latitude and
  longitude bounds. 
  
  Parameters
  ----------
  data : dict
    data dictionary
      
  start_date : str, optional
    start date of output data set in YYYY-MM-DD. Uses the start date 
    of *data* if not provided. Defaults to the earliest date.

  stop_date : str, optional
    Stop date of output data set in YYYY-MM-DD. Uses the stop date 
    of *data* if not provided. Defaults to the latest date.
      
  min_lon, max_lon, min_lat, max_lat : float, optional
    Spatial bounds on the output data set
  
  stations : str list, optional
    List of stations to be removed from the dataset. This is in 
    addition to the station removed by the lon/lat bounds.
    
  Returns
  -------
  out_dict : dict
    output data dictionary

  '''
  logger.info('Running pygeons crop ...')
  data = dict_from_hdf5(input_file)
  out = dict((k,np.copy(v)) for k,v in data.iteritems())

  if start_date is None:
    start_date = mjd.mjd_inv(data['time'].min(),'%Y-%m-%d')

  if stop_date is None:
    stop_date = mjd.mjd_inv(data['time'].max(),'%Y-%m-%d')

  if stations is None:
    stations = []

  # remove times that are not within the bounds of *start_date* and 
  # *stop_date*
  start_time = int(mjd.mjd(start_date,'%Y-%m-%d'))
  stop_time = int(mjd.mjd(stop_date,'%Y-%m-%d'))
  idx = ((data['time'] >= start_time) &
         (data['time'] <= stop_time))
  out['time'] = out['time'][idx]
  for dir in ['east','north','vertical']:
    out[dir] = out[dir][idx,:]
    out[dir + '_std_dev'] = out[dir + '_std_dev'][idx,:]

  # find stations that are within the bounds
  in_bounds = ((data['longitude'] > min_lon) &
               (data['longitude'] < max_lon) &
               (data['latitude'] > min_lat) &
               (data['latitude'] < max_lat))
  # find stations that are in the list of stations to be removed
  in_list = np.array([i in stations for i in data['id']])
  # keep stations that are in bounds and not in the list
  idx, = (in_bounds & ~in_list).nonzero()

  out['id'] = out['id'][idx]
  out['longitude'] = out['longitude'][idx]
  out['latitude'] = out['latitude'][idx]
  for dir in ['east','north','vertical']:
    out[dir] = out[dir][:,idx]
    out[dir + '_std_dev'] = out[dir + '_std_dev'][:,idx]

  # set output file name
  if output_stem is None:
    output_stem = _remove_extension(input_file) + '.crop'

  output_file = output_stem + '.h5'
  hdf5_from_dict(output_file,out)
  logger.info('Cropped data written to %s' % output_file)
  return