def pygeons_autoclean(input_file, network_model=('spwen12-se',), network_params=(1.0,0.1,100.0), station_model=('linear',), station_params=(), output_stem=None, outlier_tol=4.0): ''' Remove outliers with a data editing algorithm ''' logger.info('Running pygeons autoclean ...') data = dict_from_hdf5(input_file) if data['time_exponent'] != 0: raise ValueError('input dataset must have units of displacement') if data['space_exponent'] != 1: raise ValueError('input dataset must have units of displacement') # dictionary which will contain the edited data out = dict((k,np.copy(v)) for k,v in data.iteritems()) # convert params to a dictionary of hyperparameters for each direction network_params = _params_dict(network_params) station_params = _params_dict(station_params) # make output file name if output_stem is None: output_stem = _remove_extension(input_file) + '.autoclean' output_file = output_stem + '.h5' # convert geodetic positions to cartesian bm = make_basemap(data['longitude'],data['latitude']) x,y = bm(data['longitude'],data['latitude']) xy = np.array([x,y]).T _log_autoclean(input_file, network_model,network_params, station_model,station_params, outlier_tol, output_file) for dir in ['east','north','vertical']: de,sde = autoclean(t=data['time'][:,None], x=xy, d=data[dir], sd=data[dir+'_std_dev'], network_model=network_model, network_params=network_params[dir], station_model=station_model, station_params=station_params[dir], tol=outlier_tol) out[dir] = de out[dir+'_std_dev'] = sde hdf5_from_dict(output_file,out) logger.info('Edited data written to %s' % output_file) return
def pygeons_fit(input_file, network_model=('spwen12-se',), network_params=(1.0,0.1,100.0), station_model=('linear',), station_params=(), output_stem=None): ''' Condition the Gaussian process to the observations and evaluate the posterior at the observation points. ''' logger.info('Running pygeons fit ...') data = dict_from_hdf5(input_file) if data['time_exponent'] != 0: raise ValueError('input dataset must have units of displacement') if data['space_exponent'] != 1: raise ValueError('input dataset must have units of displacement') # create output dictionary out = dict((k,np.copy(v)) for k,v in data.iteritems()) # convert params to a dictionary of hyperparameters for each direction network_params = _params_dict(network_params) station_params = _params_dict(station_params) # make output file name if output_stem is None: output_stem = _remove_extension(input_file) + '.fit' output_file = output_stem + '.h5' # convert geodetic positions to cartesian bm = make_basemap(data['longitude'],data['latitude']) x,y = bm(data['longitude'],data['latitude']) xy = np.array([x,y]).T _log_fit(input_file, network_model,network_params, station_model,station_params, output_file) for dir in ['east','north','vertical']: u,su = fit(t=data['time'][:,None], x=xy, d=data[dir], sd=data[dir+'_std_dev'], network_model=network_model, network_params=network_params[dir], station_model=station_model, station_params=station_params[dir]) out[dir] = u out[dir+'_std_dev'] = su hdf5_from_dict(output_file,out) logger.info('Posterior fit written to %s' % output_file) return
def pygeons_strain_view(xdiff_file,ydiff_file,map_resolution='i',**kwargs): ''' runs the PyGeoNS Interactive Strain Viewer Parameters ---------- xdiff_file : str ydiff_file : str map_resolution : str basemap resolution **kwargs : gets passed to pygeons.strain.view ''' logger.info('Running pygeons strain-view ...') data_dx = dict_from_hdf5(xdiff_file) data_dy = dict_from_hdf5(ydiff_file) data_dx,data_dy = _common_context([data_dx,data_dy]) if ((data_dx['space_exponent'] != 0) | (data_dy['space_exponent'] != 0)): raise ValueError('The input datasets cannot have spatial units') t = data_dx['time'] id = data_dx['id'] lon = data_dx['longitude'] lat = data_dx['latitude'] dates = [mjd_inv(ti,'%Y-%m-%d') for ti in t] units = _unit_string(data_dx['space_exponent'], data_dx['time_exponent']) # factor that converts units of days and m to the units in *units* conv = 1.0/unit_conversion(units,time='day',space='m') exx = conv*data_dx['east'] eyy = conv*data_dy['north'] exy = 0.5*conv*(data_dx['north'] + data_dy['east']) sxx = conv*data_dx['east_std_dev'] syy = conv*data_dy['north_std_dev'] sxy = 0.5*conv*np.sqrt(data_dx['north_std_dev']**2 + data_dy['east_std_dev']**2) ts_fig,ts_ax = plt.subplots(3,1,sharex=True,num='Time Series View', facecolor='white') _setup_ts_ax(ts_ax) map_fig,map_ax = plt.subplots(num='Map View',facecolor='white') bm = make_basemap(lon,lat,resolution=map_resolution) _setup_map_ax(bm,map_ax) x,y = bm(lon,lat) pos = np.array([x,y]).T interactive_strain_viewer( t,pos,exx,eyy,exy,sxx=sxx,syy=syy,sxy=sxy, map_ax=map_ax,ts_ax=ts_ax,time_labels=dates, station_labels=id,units=units,**kwargs) return
def pygeons_vector_view(input_files,map_resolution='i',**kwargs): ''' runs the PyGeoNS interactive vector viewer Parameters ---------- data_list : (N,) list of dicts list of data dictionaries being plotted map_resolution : str basemap resolution **kwargs : gets passed to pygeons.plot.view.interactive_view ''' logger.info('Running pygeons vector-view ...') data_list = [dict_from_hdf5(i) for i in input_files] data_list = _common_context(data_list) # use filenames for dataset labels if none were provided dataset_labels = kwargs.pop('dataset_labels',input_files) t = data_list[0]['time'] lon = data_list[0]['longitude'] lat = data_list[0]['latitude'] id = data_list[0]['id'] dates = [mjd_inv(ti,'%Y-%m-%d') for ti in t] units = _unit_string(data_list[0]['space_exponent'], data_list[0]['time_exponent']) # factor that converts units of days and m to the units in *units* conv = 1.0/unit_conversion(units,time='day',space='m') u = [conv*d['east'] for d in data_list] v = [conv*d['north'] for d in data_list] z = [conv*d['vertical'] for d in data_list] su = [conv*d['east_std_dev'] for d in data_list] sv = [conv*d['north_std_dev'] for d in data_list] sz = [conv*d['vertical_std_dev'] for d in data_list] ts_fig,ts_ax = plt.subplots(3,1,sharex=True,num='Time Series View', facecolor='white') _setup_ts_ax(ts_ax) map_fig,map_ax = plt.subplots(num='Map View',facecolor='white') bm = make_basemap(lon,lat,resolution=map_resolution) _setup_map_ax(bm,map_ax) x,y = bm(lon,lat) pos = np.array([x,y]).T interactive_vector_viewer( t,pos,u=u,v=v,z=z,su=su,sv=sv,sz=sz, ts_ax=ts_ax,map_ax=map_ax, dataset_labels=dataset_labels, station_labels=id,time_labels=dates, units=units,**kwargs) return
ms = 2.737e-9 x,y = pos.T _,xg = np.meshgrid(times,x,indexing='ij') tg,yg = np.meshgrid(times,y,indexing='ij') u = 0.0*ms*tg*xg + 0.0*ms*tg*yg v = 0.0*ms*tg*xg + 3.0*ms*tg*yg z = 0.0*tg u = u - u[0,:] v = v - v[0,:] z = z - z[0,:] return u,v,z lon = np.array([-83.3,-82.75,-85.26,-83.36]) lat = np.array([42.31,42.91,45.20,42.92]) id = np.array(['STA1','STA2','STA3','STA4']) bm = make_basemap(lon,lat) x,y = bm(lon,lat) xy = np.array([x,y]).T start_date = mjd('2000-01-01','%Y-%m-%d') stop_date = mjd('2000-02-01','%Y-%m-%d') times = np.arange(start_date,stop_date+1) u,v,z = make_data(xy,times) su = 0.001*np.ones_like(u) sv = 0.001*np.ones_like(v) sz = 0.001*np.ones_like(z) u += np.random.normal(0.0,su) v += np.random.normal(0.0,sv) z += np.random.normal(0.0,sz) data = {} data['id'] = id
def pygeons_clean(input_file, resolution='i', input_edits_file=None, break_lons=None, break_lats=None, break_conn=None, no_display=False, output_stem=None, **kwargs): ''' runs the PyGeoNS Interactive Cleaner Parameters ---------- data : dict data dictionary resolution : str basemap resolution input_edits_file : str Name of the file containing edits which will automatically be applied before opening up the interactive viewer. output_edits_file : str Name of the file where all edits will be recorded. **kwargs : gets passed to pygeons.clean.clean Returns ------- out : dict output data dictionary ''' logger.info('Running pygeons clean ...') data = dict_from_hdf5(input_file) out = dict((k, np.copy(v)) for k, v in data.iteritems()) ts_fig, ts_ax = plt.subplots(3, 1, sharex=True, num='Time Series View', facecolor='white') _setup_ts_ax(ts_ax) map_fig, map_ax = plt.subplots(num='Map View', facecolor='white') bm = make_basemap(data['longitude'], data['latitude'], resolution=resolution) _setup_map_ax(bm, map_ax) x, y = bm(data['longitude'], data['latitude']) pos = np.array([x, y]).T t = data['time'] dates = [mjd_inv(ti, '%Y-%m-%d') for ti in t] units = _unit_string(data['space_exponent'], data['time_exponent']) conv = 1.0 / unit_conversion(units, time='day', space='m') u = conv * data['east'] v = conv * data['north'] z = conv * data['vertical'] su = conv * data['east_std_dev'] sv = conv * data['north_std_dev'] sz = conv * data['vertical_std_dev'] ic = InteractiveCleaner(t, pos, u=u, v=v, z=z, su=su, sv=sv, sz=sz, map_ax=map_ax, ts_ax=ts_ax, time_labels=dates, units=units, station_labels=data['id'], **kwargs) # make edits to the data set prior to displaying it if input_edits_file is not None: with open(input_edits_file, 'r') as fin: for line in fin: # ignore blank lines if line.isspace(): continue type, sta, a, b = line.strip().split() # set the current station in *ic* to the station for this edit xidx, = (data['id'] == sta).nonzero() if len(xidx) == 0: # continue because the station does not exist in this # dataset continue ic.xidx = xidx[0] if type == 'outliers': start_time = mjd(a, '%Y-%m-%d') stop_time = mjd(b, '%Y-%m-%d') ic.remove_outliers(start_time, stop_time) elif type == 'jump': jump_time = mjd(a, '%Y-%m-%d') delta = int(b) ic.remove_jump(jump_time, delta) else: raise ValueError( 'edit type must be either "outliers" or "jump"') if not no_display: ic.update() ic.connect() # set output file name if output_stem is None: output_stem = _remove_extension(input_file) + '.clean' output_file = output_stem + '.h5' output_edits_file = output_stem + '.txt' with open(output_edits_file, 'w') as fout: for i in ic.log: type, xidx, a, b = i if type == 'outliers': station = data['id'][xidx] start_date = mjd_inv(a, '%Y-%m-%d') stop_date = mjd_inv(b, '%Y-%m-%d') fout.write('outliers %s %s %s\n' % (station, start_date, stop_date)) elif type == 'jump': station = data['id'][xidx] jump_date = mjd_inv(a, '%Y-%m-%d') fout.write('jump %s %s %s\n' % (station, jump_date, b)) else: raise ValueError( 'edit type must be either "outliers" or "jump"') logger.info('Edits saved to %s' % output_edits_file) clean_data = ic.get_data() out['east'] = clean_data[0] / conv out['north'] = clean_data[1] / conv out['vertical'] = clean_data[2] / conv out['east_std_dev'] = clean_data[3] / conv out['north_std_dev'] = clean_data[4] / conv out['vertical_std_dev'] = clean_data[5] / conv hdf5_from_dict(output_file, out) logger.info('Cleaned data written to %s' % output_file) logger.info('Edits written to %s' % output_edits_file) return
import numpy as np from pygeons.mjd import mjd from pygeons.io.io import text_from_dict from pygeons.basemap import make_basemap import matplotlib.pyplot as plt np.random.seed(1) ## observation points ##################################################################### pos_geo = np.array([[-83.74,42.28,0.0], [-83.08,42.33,0.0], [-83.33,41.94,0.0]]) Nx = len(pos_geo) bm = make_basemap(pos_geo[:,0],pos_geo[:,1]) pos_cart = np.array(bm(pos_geo[:,0],pos_geo[:,1])).T dx = pos_cart[:,0] - pos_cart[0,0] dy = pos_cart[:,1] - pos_cart[0,1] dispdx = np.array([[0.0,1e-6,0.0]]).repeat(Nx,axis=0) dispdy = np.array([[0.0,0.0,0.0]]).repeat(Nx,axis=0) disp = dispdx*dx[:,None] + dispdy*dy[:,None] u,v,z = disp.T dudx,dvdx,dzdx = dispdx.T dudy,dvdy,dzdy = dispdy.T # make disp. time dependent start_time = mjd('2015-07-01','%Y-%m-%d') stop_time = mjd('2017-07-01','%Y-%m-%d') peak_time = float(mjd('2016-07-01','%Y-%m-%d')) times = np.arange(start_time,stop_time+1).astype(float) Nt = len(times)
def pygeons_strain(input_file, network_prior_model=('spwen12-se',), network_prior_params=(1.0,0.1,100.0), network_noise_model=(), network_noise_params=(), station_noise_model=('linear',), station_noise_params=(), start_date=None,stop_date=None, positions=None,positions_file=None, rate=True,vertical=True,covariance=False, output_stem=None): ''' calculates strain ''' logger.info('Running pygeons strain ...') data = dict_from_hdf5(input_file) if data['time_exponent'] != 0: raise ValueError('input dataset must have units of displacement') if data['space_exponent'] != 1: raise ValueError('input dataset must have units of displacement') out_dx = dict((k,np.copy(v)) for k,v in data.iteritems()) out_dy = dict((k,np.copy(v)) for k,v in data.iteritems()) # convert params to a dictionary of hyperparameters for each direction network_prior_params = _params_dict(network_prior_params) network_noise_params = _params_dict(network_noise_params) station_noise_params = _params_dict(station_noise_params) # convert geodetic input positions to cartesian bm = make_basemap(data['longitude'],data['latitude']) x,y = bm(data['longitude'],data['latitude']) xy = np.array([x,y]).T # set output positions if (positions is None) & (positions_file is None): # no output positions were specified so return the solution at the # input data positions output_id = np.array(data['id'],copy=True) output_lon = np.array(data['longitude'],copy=True) output_lat = np.array(data['latitude'],copy=True) else: output_id = np.zeros((0,),dtype=str) output_lon = np.zeros((0,),dtype=float) output_lat = np.zeros((0,),dtype=float) if positions_file is not None: # if positions file was specified pos = np.loadtxt(positions_file,dtype=str,ndmin=2) if pos.shape[1] != 3: raise ValueError( 'positions file must contain a column for IDs, longitudes, ' 'and latitudes') output_id = np.hstack((output_id,pos[:,0])) output_lon = np.hstack((output_lon,pos[:,1].astype(float))) output_lat = np.hstack((output_lat,pos[:,2].astype(float))) if positions is not None: # if positions were specified via the command line pos = np.array(positions,dtype=str).reshape((-1,3)) output_id = np.hstack((output_id,pos[:,0])) output_lon = np.hstack((output_lon,pos[:,1].astype(float))) output_lat = np.hstack((output_lat,pos[:,2].astype(float))) # convert geodetic output positions to cartesian output_x,output_y = bm(output_lon,output_lat) output_xy = np.array([output_x,output_y]).T # set output times if start_date is None: start_date = mjd_inv(np.min(data['time']),'%Y-%m-%d') if stop_date is None: stop_date = mjd_inv(np.max(data['time']),'%Y-%m-%d') start_time = mjd(start_date,'%Y-%m-%d') stop_time = mjd(stop_date,'%Y-%m-%d') output_time = np.arange(start_time,stop_time+1) # set output file names if output_stem is None: output_stem = _remove_extension(input_file) + '.strain' output_dx_file = output_stem + '.dudx.h5' output_dy_file = output_stem + '.dudy.h5' _log_strain(input_file, network_prior_model,network_prior_params, network_noise_model,network_noise_params, station_noise_model,station_noise_params, start_date,stop_date,output_id,rate,vertical, covariance,output_dx_file,output_dy_file) for dir in ['east','north','vertical']: if (dir == 'vertical') & (not vertical): logger.debug('Not computing vertical deformation gradients') # do not compute the deformation gradients for vertical. Just # return zeros. dx = np.zeros((output_time.shape[0],output_xy.shape[0])) sdx = np.zeros((output_time.shape[0],output_xy.shape[0])) dy = np.zeros((output_time.shape[0],output_xy.shape[0])) sdy = np.zeros((output_time.shape[0],output_xy.shape[0])) if covariance: # if covariance is True then create an empty array of # covariances cdx = np.zeros((output_time.shape[0],output_xy.shape[0], output_time.shape[0],output_xy.shape[0])) cdy = np.zeros((output_time.shape[0],output_xy.shape[0], output_time.shape[0],output_xy.shape[0])) soln = (dx,sdx,cdx,dy,sdy,cdy) else: soln = (dx,sdx,dy,sdy) else: soln = strain(t=data['time'][:,None], x=xy, d=data[dir], sd=data[dir+'_std_dev'], network_prior_model=network_prior_model, network_prior_params=network_prior_params[dir], network_noise_model=network_noise_model, network_noise_params=network_noise_params[dir], station_noise_model=station_noise_model, station_noise_params=station_noise_params[dir], out_t=output_time[:,None], out_x=output_xy, rate=rate, covariance=covariance) if covariance: # soln contains six entries when covariance is True dx,sdx,cdx,dy,sdy,cdy = soln out_dx[dir] = dx out_dx[dir+'_std_dev'] = sdx out_dx[dir+'_covariance'] = cdx out_dy[dir] = dy out_dy[dir+'_std_dev'] = sdy out_dy[dir+'_covariance'] = cdy else: # soln contains four entries when covariance is False dx,sdx,dy,sdy = soln out_dx[dir] = dx out_dx[dir+'_std_dev'] = sdx out_dy[dir] = dy out_dy[dir+'_std_dev'] = sdy out_dx['time'] = output_time out_dx['longitude'] = output_lon out_dx['latitude'] = output_lat out_dx['id'] = output_id out_dx['time_exponent'] = -int(rate) out_dx['space_exponent'] = 0 out_dy['time'] = output_time out_dy['longitude'] = output_lon out_dy['latitude'] = output_lat out_dy['id'] = output_id out_dy['time_exponent'] = -int(rate) out_dy['space_exponent'] = 0 hdf5_from_dict(output_dx_file,out_dx) hdf5_from_dict(output_dy_file,out_dy) if rate: logger.info('Posterior velocity gradients written to %s and %s' % (output_dx_file,output_dy_file)) else: logger.info('Posterior displacement gradients written to %s and %s' % (output_dx_file,output_dy_file)) return
def pygeons_reml(input_file, network_model=('spwen12-se',), network_params=(1.0,0.1,100.0), network_fix=(), station_model=('linear',), station_params=(), station_fix=(), output_stem=None): ''' Restricted maximum likelihood estimation ''' logger.info('Running pygeons reml ...') data = dict_from_hdf5(input_file) if data['time_exponent'] != 0: raise ValueError('input dataset must have units of displacement') if data['space_exponent'] != 1: raise ValueError('input dataset must have units of displacement') # convert params to a dictionary of hyperparameters for each direction network_params = _params_dict(network_params) network_fix = np.asarray(network_fix,dtype=int) station_params = _params_dict(station_params) station_fix = np.asarray(station_fix,dtype=int) # make output file name if output_stem is None: output_stem = _remove_extension(input_file) + '.reml' output_file = output_stem + '.txt' # convert geodetic positions to cartesian bm = make_basemap(data['longitude'],data['latitude']) x,y = bm(data['longitude'],data['latitude']) xy = np.array([x,y]).T # call "pygeons info" on the input data file. pipe the results to # the output file sp.call('pygeons info %s > %s' % (input_file,output_file),shell=True) msg = _log_reml(input_file, network_model,network_params,network_fix, station_model,station_params,station_fix, output_file) # write log entry to file with open(output_file,'a') as fout: fout.write(msg) # make a dictionary storing likelihoods likelihood = {} for dir in ['east','north','vertical']: net_opt,sta_opt,like = reml(t=data['time'][:,None], x=xy, d=data[dir], sd=data[dir+'_std_dev'], network_model=network_model, network_params=network_params[dir], network_fix=network_fix, station_model=station_model, station_params=station_params[dir], station_fix=station_fix) # update the parameter dict with the optimal values network_params[dir] = net_opt station_params[dir] = sta_opt likelihood[dir] = like msg = _log_reml_results(input_file, network_model,network_params,network_fix, station_model,station_params,station_fix, likelihood,output_file) # write log entry to file with open(output_file,'a') as fout: fout.write(msg) logger.info('Optimal parameters written to %s' % output_file) return
def pygeons_clean(input_file,resolution='i', input_edits_file=None, break_lons=None,break_lats=None, break_conn=None,no_display=False, output_stem=None,**kwargs): ''' runs the PyGeoNS Interactive Cleaner Parameters ---------- data : dict data dictionary resolution : str basemap resolution input_edits_file : str Name of the file containing edits which will automatically be applied before opening up the interactive viewer. output_edits_file : str Name of the file where all edits will be recorded. **kwargs : gets passed to pygeons.clean.clean Returns ------- out : dict output data dictionary ''' logger.info('Running pygeons clean ...') data = dict_from_hdf5(input_file) out = dict((k,np.copy(v)) for k,v in data.iteritems()) ts_fig,ts_ax = plt.subplots(3,1,sharex=True,num='Time Series View',facecolor='white') _setup_ts_ax(ts_ax) map_fig,map_ax = plt.subplots(num='Map View',facecolor='white') bm = make_basemap(data['longitude'],data['latitude'],resolution=resolution) _setup_map_ax(bm,map_ax) x,y = bm(data['longitude'],data['latitude']) pos = np.array([x,y]).T t = data['time'] dates = [mjd_inv(ti,'%Y-%m-%d') for ti in t] units = _unit_string(data['space_exponent'],data['time_exponent']) conv = 1.0/unit_conversion(units,time='day',space='m') u = conv*data['east'] v = conv*data['north'] z = conv*data['vertical'] su = conv*data['east_std_dev'] sv = conv*data['north_std_dev'] sz = conv*data['vertical_std_dev'] ic = InteractiveCleaner( t,pos,u=u,v=v,z=z,su=su,sv=sv,sz=sz, map_ax=map_ax,ts_ax=ts_ax, time_labels=dates, units=units, station_labels=data['id'], **kwargs) # make edits to the data set prior to displaying it if input_edits_file is not None: with open(input_edits_file,'r') as fin: for line in fin: # ignore blank lines if line.isspace(): continue type,sta,a,b = line.strip().split() # set the current station in *ic* to the station for this edit xidx, = (data['id'] == sta).nonzero() if len(xidx) == 0: # continue because the station does not exist in this # dataset continue ic.xidx = xidx[0] if type == 'outliers': start_time = mjd(a,'%Y-%m-%d') stop_time = mjd(b,'%Y-%m-%d') ic.remove_outliers(start_time,stop_time) elif type == 'jump': jump_time = mjd(a,'%Y-%m-%d') delta = int(b) ic.remove_jump(jump_time,delta) else: raise ValueError('edit type must be either "outliers" or "jump"') if not no_display: ic.update() ic.connect() # set output file name if output_stem is None: output_stem = _remove_extension(input_file) + '.clean' output_file = output_stem + '.h5' output_edits_file = output_stem + '.txt' with open(output_edits_file,'w') as fout: for i in ic.log: type,xidx,a,b = i if type == 'outliers': station = data['id'][xidx] start_date = mjd_inv(a,'%Y-%m-%d') stop_date = mjd_inv(b,'%Y-%m-%d') fout.write('outliers %s %s %s\n' % (station,start_date,stop_date)) elif type == 'jump': station = data['id'][xidx] jump_date = mjd_inv(a,'%Y-%m-%d') fout.write('jump %s %s %s\n' % (station,jump_date,b)) else: raise ValueError('edit type must be either "outliers" or "jump"') logger.info('Edits saved to %s' % output_edits_file) clean_data = ic.get_data() out['east'] = clean_data[0]/conv out['north'] = clean_data[1]/conv out['vertical'] = clean_data[2]/conv out['east_std_dev'] = clean_data[3]/conv out['north_std_dev'] = clean_data[4]/conv out['vertical_std_dev'] = clean_data[5]/conv hdf5_from_dict(output_file,out) logger.info('Cleaned data written to %s' % output_file) logger.info('Edits written to %s' % output_edits_file) return