def setup_grids(args): ''' setup the grids for the problem. Inputs: args: (dict) dictionary containing input arguments. Required entries: W: dictionary with entries 'x','y','t' specifying the domain width in x, y, and time ctr: dictionary with entries 'x','y','t' specifying the domain center in x, y, and time spacing: dictionary with entries 'z0','dz' and 'dt' specifying the spacing of the z0 grid, the spacing of the dz grid, and the duration of the epochs srs_proj4: a proj4 string specifying the data projection mask_file: the mask file which has 1 for points in the domain (data will be used and strong constraints applied) mask_data: pointCollection.data object containing the mask. If this is specified, mask_file is ignored Outputs: grids: (dict) a dictionary with entries 'z0', 'dz' and 't', each containing a fd_grid object bds: (dict) a dictionary specifying the domain bounds in x, y, and t (2-element vector for each) Each grid has an assigned location to which its points are mapped in the solution vector. In this From left to right, the grids are z0, then dz ''' bds = { coord: args['ctr'][coord] + np.array([-0.5, 0.5]) * args['W'][coord] for coord in ('x', 'y', 't') } grids = dict() if args['mask_data'] is not None: mask_file = None else: mask_file = args['mask_file'] grids['z0']=fd_grid( [bds['y'], bds['x']], args['spacing']['z0']*np.ones(2),\ name='z0', srs_proj4=args['srs_proj4'], mask_file=args['mask_file'],\ mask_data=args['mask_data']) grids['dz']=fd_grid( [bds['y'], bds['x'], bds['t']], \ [args['spacing']['dz'], args['spacing']['dz'], args['spacing']['dt']], \ name='dz', col_0=grids['z0'].N_nodes, srs_proj4=args['srs_proj4'], \ mask_file=mask_file, mask_data=args['mask_data']) grids['z0'].col_N = grids['dz'].col_N grids['t'] = fd_grid([bds['t']], [args['spacing']['dt']], name='t') grids['z0'].cell_area = calc_cell_area(grids['z0']) if np.any(grids['dz'].delta[0:2] > grids['z0'].delta): grids['dz'].cell_area = sum_cell_area(grids['z0'], grids['dz']) else: grids['dz'].cell_area = calc_cell_area(grids['dz']) return grids, bds
def select_repeat_data(data, grids, repeat_dt, resolution): """ Select data that are repeats input arguments: data: input data grids: grids repeat_dt: time interval by which repeats must be separated to count resolution: spatial resolution of repeat calculation """ repeat_grid = fd_grid(grids['z0'].bds, resolution * np.ones(2), name='repeat') t_coarse = np.round( (data.time - grids['dz'].bds[2][0]) / repeat_dt) * repeat_dt grid_repeat_count = np.zeros(np.prod(repeat_grid.shape)) for t_val in np.unique(t_coarse): # select the data points for each epoch ii = t_coarse == t_val # use the lin_op.interp_mtx to find the grid points associated with each node grid_repeat_count += np.asarray( lin_op(repeat_grid).interp_mtx(( data.y[ii], data.x[ii])).toCSR().sum(axis=0) > 0.5).ravel() data_repeats = lin_op(repeat_grid).interp_mtx( (data.y, data.x)).toCSR().dot( (grid_repeat_count > 1).astype(np.float64)) return data_repeats > 0.5
def setup_PS_bias(data, G_data, constraint_op_list, grids, bds, args): ''' set up a matrix to fit a smooth POCA-vs-Swath bias ''' grids['PS_bias']=fd_grid( [bds['y'], bds['x']], \ [args['spacing']['dz'], args['spacing']['dz']],\ name='PS_bias', srs_proj4=args['srs_proj4'],\ mask_file=args['mask_file'], mask_data=args['mask_data'], \ col_0=grids['dz'].col_N) ps_mtx=lin_op(grid=grids['PS_bias'], name='PS_bias').\ interp_mtx(data.coords()[0:2]) # POCA rows should have zero entries temp = ps_mtx.v.ravel() temp[np.in1d(ps_mtx.r.ravel(), np.flatnonzero(data.swath == 0))] = 0 ps_mtx.v = temp.reshape(ps_mtx.v.shape) G_data.add(ps_mtx) #Build a constraint matrix for the curvature of the PS bias grad2_ps = lin_op(grids['PS_bias'], name='grad2_PS').grad2(DOF='PS_bias') grad2_ps.expected=args['E_RMS_d2x_PS_bias']+np.zeros(grad2_ps.N_eq)/\ np.sqrt(np.prod(grids['dz'].delta[0:2])) #Build a constraint matrix for the magnitude of the PS bias mag_ps=lin_op(grids['PS_bias'], name='mag_ps').data_bias(\ ind=np.arange(grids['PS_bias'].N_nodes), col=np.arange(grids['PS_bias'].col_0, grids['PS_bias'].col_N)) mag_ps.expected = args['E_RMS_PS_bias'] + np.zeros(mag_ps.N_eq) constraint_op_list.append(grad2_ps) #constraint_op_list.append(grad_ps) constraint_op_list.append(mag_ps)
def sum_cell_area(grid_f, grid_c, cell_area_f=None, return_op=False, sub0s=None, taper=True): # calculate the area of masked cells in a coarse grid within the cells of a fine grid if cell_area_f is None: cell_area_f = calc_cell_area(grid_f) * grid_f.mask n_k = (grid_c.delta[0:2] / grid_f.delta[0:2] + 1).astype(int) temp_grid = fd_grid((grid_f.bds[0:2]), deltas=grid_f.delta[0:2]) fine_to_coarse = lin_op(grid=temp_grid).sum_to_grid3( n_k, sub0s=sub0s, taper=True, valid_equations_only=False, dims=[0, 1]) result = fine_to_coarse.toCSR().dot(cell_area_f.ravel()).reshape( grid_c.shape[0:2]) if return_op: return result, fine_to_coarse return result
def setup_mask(data, grids, valid_data, bds, args): ''' Mark datapoints for which the mask is zero as invalid Inputs: data: (pc.data) data structure. grids: (dict) dictionary of fd_grid objects generated by setup_grids valid_data: (numpy boolean array, size(data)) indicates valid data points bds: (dict) a dictionary specifying the domain bounds in x, y, and t (2-element vector for each) ''' temp = fd_grid([bds['y'], bds['x']], [args['spacing']['z0'], args['spacing']['z0']], name='z0', srs_proj4=args['srs_proj4'], mask_file=args['mask_file'], mask_data=args['mask_data']) data_mask = lin_op(temp, name='interp_z').interp_mtx( data.coords()[0:2]).toCSR().dot(grids['z0'].mask.ravel()) data_mask[~np.isfinite(data_mask)] = 0 if np.any(data_mask == 0): data.index(~(data_mask == 0)) valid_data[valid_data] = ~(data_mask == 0)
def smooth_xyt_fit(**kwargs): required_fields = ('data', 'W', 'ctr', 'spacing', 'E_RMS') args = { 'reference_epoch': 0, 'W_ctr': 1e4, 'mask_file': None, 'mask_scale': None, 'compute_E': False, 'max_iterations': 10, 'srs_WKT': None, 'N_subset': None, 'bias_params': None, 'repeat_res': None, 'repeat_dt': 1, 'Edit_only': False, 'dzdt_lags': [1, 4], 'VERBOSE': True } args.update(kwargs) for field in required_fields: if field not in kwargs: raise ValueError("%s must be defined", field) valid_data = np.ones_like(args['data'].x, dtype=bool) timing = dict() if args['N_subset'] is not None: tic = time() valid_data = edit_data_by_subset_fit(args['N_subset'], args) timing['edit_by_subset'] = time() - tic if args['Edit_only']: return { 'timing': timing, 'data': args['data'].copy().subset(valid_data) } m = dict() E = dict() # define the grids tic = time() bds = { coord: args['ctr'][coord] + np.array([-0.5, 0.5]) * args['W'][coord] for coord in ('x', 'y', 't') } grids = dict() grids['z0'] = fd_grid([bds['y'], bds['x']], args['spacing']['z0'] * np.ones(2), name='z0', srs_WKT=args['srs_WKT'], mask_file=args['mask_file']) grids['dz']=fd_grid( [bds['y'], bds['x'], bds['t']], \ [args['spacing']['dz'], args['spacing']['dz'], args['spacing']['dt']], col_0=grids['z0'].N_nodes, name='dz', srs_WKT=args['srs_WKT'], mask_file=args['mask_file']) grids['z0'].col_N = grids['dz'].col_N grids['t'] = fd_grid([bds['t']], [args['spacing']['dt']], name='t') # select only the data points that are within the grid bounds valid_z0 = grids['z0'].validate_pts((args['data'].coords()[0:2])) valid_dz = grids['dz'].validate_pts((args['data'].coords())) valid_data = valid_data & valid_dz & valid_z0 # if repeat_res is given, resample the data to include only repeat data (to within a spatial tolerance of repeat_res) if args['repeat_res'] is not None: valid_data[valid_data]=valid_data[valid_data] & \ select_repeat_data(args['data'].copy().subset(valid_data), grids, args['repeat_dt'], args['repeat_res']) # subset the data based on the valid mask data = args['data'].copy().subset(valid_data) # if we have a mask file, use it to subset the data # needs to be done after the valid subset because otherwise the interp_mtx for the mask file fails. if args['mask_file'] is not None: temp = fd_grid([bds['y'], bds['x']], [args['spacing']['z0'], args['spacing']['z0']], name='z0', srs_WKT=args['srs_WKT'], mask_file=args['mask_file']) data_mask = lin_op(temp, name='interp_z').interp_mtx( data.coords()[0:2]).toCSR().dot(grids['z0'].mask.ravel()) data_mask[~np.isfinite(data_mask)] = 0 if np.any(data_mask == 0): data.subset(~(data_mask == 0)) valid_data[valid_data] = ~(data_mask == 0) # define the interpolation operator, equal to the sum of the dz and z0 operators G_data = lin_op(grids['z0'], name='interp_z').interp_mtx(data.coords()[0:2]) G_data.add(lin_op(grids['dz'], name='interp_dz').interp_mtx(data.coords())) # define the smoothness constraints grad2_z0 = lin_op(grids['z0'], name='grad2_z0').grad2(DOF='z0') grad2_dz = lin_op(grids['dz'], name='grad2_dzdt').grad2_dzdt(DOF='z', t_lag=1) grad_dzdt = lin_op(grids['dz'], name='grad_dzdt').grad_dzdt(DOF='z', t_lag=1) constraint_op_list = [grad2_z0, grad2_dz, grad_dzdt] if 'd2z_dt2' in args['E_RMS'] and args['E_RMS']['d2z_dt2'] is not None: d2z_dt2 = lin_op(grids['dz'], name='d2z_dt2').d2z_dt2(DOF='z') constraint_op_list.append(d2z_dt2) # if bias params are given, create a set of parameters to estimate them if args['bias_params'] is not None: data, bias_model = assign_bias_ID(data, args['bias_params']) G_bias, Gc_bias, Cvals_bias, bias_model = param_bias_matrix( data, bias_model, bias_param_name='bias_ID', col_0=grids['dz'].col_N) G_data.add(G_bias) constraint_op_list.append(Gc_bias) # put the equations together Gc = lin_op(None, name='constraints').vstack(constraint_op_list) N_eq = G_data.N_eq + Gc.N_eq # put together all the errors Ec = np.zeros(Gc.N_eq) root_delta_V_dz = np.sqrt(np.prod(grids['dz'].delta)) root_delta_A_z0 = np.sqrt(np.prod(grids['z0'].delta)) Ec[Gc.TOC['rows']['grad2_z0']] = args['E_RMS'][ 'd2z0_dx2'] / root_delta_A_z0 * grad2_z0.mask_for_ind0( args['mask_scale']) Ec[Gc.TOC['rows']['grad2_dzdt']] = args['E_RMS'][ 'd3z_dx2dt'] / root_delta_V_dz * grad2_dz.mask_for_ind0( args['mask_scale']) Ec[Gc.TOC['rows']['grad_dzdt']] = args['E_RMS'][ 'd2z_dxdt'] / root_delta_V_dz * grad_dzdt.mask_for_ind0( args['mask_scale']) if 'd2z_dt2' in args['E_RMS'] and args['E_RMS']['d2z_dt2'] is not None: Ec[Gc.TOC['rows'] ['d2z_dt2']] = args['E_RMS']['d2z_dt2'] / root_delta_V_dz if args['bias_params'] is not None: Ec[Gc.TOC['rows'][Gc_bias.name]] = Cvals_bias Ed = data.sigma.ravel() # calculate the inverse square root of the data covariance matrix TCinv = sp.dia_matrix((1. / np.concatenate((Ed, Ec)), 0), shape=(N_eq, N_eq)) # define the right hand side of the equation rhs = np.zeros([N_eq]) rhs[0:data.size] = data.z.ravel() # put the fit and constraint matrices together Gcoo = sp.vstack([G_data.toCSR(), Gc.toCSR()]).tocoo() cov_rows = G_data.N_eq + np.arange(Gc.N_eq) # define the matrix that sets dz[reference_epoch]=0 by removing columns from the solution: # Find the identify the rows and columns that match the reference epoch temp_r, temp_c = np.meshgrid(np.arange(0, grids['dz'].shape[0]), np.arange(0, grids['dz'].shape[1])) z02_mask = grids['dz'].global_ind([ temp_r.transpose().ravel(), temp_c.transpose().ravel(), args['reference_epoch'] + np.zeros_like(temp_r).ravel() ]) # Identify all of the DOFs that do not include the reference epoch cols = np.arange(G_data.col_N, dtype='int') include_cols = np.setdiff1d(cols, z02_mask) # Generate a matrix that has diagonal elements corresponding to all DOFs except the reference epoch. # Multiplying this by a matrix with columns for all model parameters yeilds a matrix with no columns # corresponding to the reference epoch. Ip_c = sp.coo_matrix((np.ones_like(include_cols), (include_cols, np.arange(include_cols.size))), shape=(Gc.col_N, include_cols.size)).tocsc() # eliminate the columns for the model variables that are set to zero Gcoo = Gcoo.dot(Ip_c) timing['setup'] = time() - tic if np.any(data.z > 2500): print('outlier!') # initialize the book-keeping matrices for the inversion m0 = np.zeros(Ip_c.shape[0]) if "three_sigma_edit" in data.list_of_fields: inTSE = np.where(data.three_sigma_edit)[0] else: inTSE = np.arange(G_data.N_eq, dtype=int) if args['VERBOSE']: print("initial: %d:" % G_data.r.max()) tic_iteration = time() for iteration in range(args['max_iterations']): # build the parsing matrix that removes invalid rows Ip_r = sp.coo_matrix( (np.ones(Gc.N_eq + inTSE.size), (np.arange(Gc.N_eq + inTSE.size), np.concatenate( (inTSE, cov_rows)))), shape=(Gc.N_eq + inTSE.size, Gcoo.shape[0])).tocsc() m0_last = m0 if args['VERBOSE']: print("starting qr solve for iteration %d" % iteration) # solve the equations tic = time() m0 = Ip_c.dot( sparseqr.solve(Ip_r.dot(TCinv.dot(Gcoo)), Ip_r.dot(TCinv.dot(rhs)))) timing['sparseqr_solve'] = time() - tic # quit if the solution is too similar to the previous solution if (np.max(np.abs( (m0_last - m0)[Gc.TOC['cols']['dz']])) < 0.05) and (iteration > 2): break # calculate the full data residual rs_data = (data.z - G_data.toCSR().dot(m0)) / data.sigma # calculate the robust standard deviation of the scaled residuals for the selected data sigma_hat = RDE(rs_data[inTSE]) inTSE_last = inTSE # select the data that are within 3*sigma of the solution inTSE = np.where(np.abs(rs_data) < 3.0 * np.maximum(1, sigma_hat))[0] if args['VERBOSE']: print('found %d in TSE, sigma_hat=%3.3f' % (inTSE.size, sigma_hat)) if (sigma_hat <= 1 or (inTSE.size == inTSE_last.size and np.all(inTSE_last == inTSE))) and (iteration > 2): if args['VERBOSE']: print("sigma_hat LT 1, exiting") break timing['iteration'] = time() - tic_iteration inTSE = inTSE_last valid_data[valid_data] = (np.abs(rs_data) < 3.0 * np.maximum(1, sigma_hat)) data.assign( {'three_sigma_edit': np.abs(rs_data) < 3.0 * np.maximum(1, sigma_hat)}) # report the model-based estimate of the data points data.assign({'z_est': np.reshape(G_data.toCSR().dot(m0), data.shape)}) # reshape the components of m to the grid shapes m['z0'] = np.reshape(m0[Gc.TOC['cols']['z0']], grids['z0'].shape) m['dz'] = np.reshape(m0[Gc.TOC['cols']['dz']], grids['dz'].shape) # calculate height rates for lag in args['dzdt_lags']: this_name = 'dzdt_lag%d' % lag m[this_name] = lin_op(grids['dz'], name='dzdt', col_N=G_data.col_N).dzdt(lag=lag).grid_prod(m0) # build a matrix that takes the average of the central 20 km of the delta-z grid XR = np.mean(grids['z0'].bds[0]) + np.array([-1., 1.]) * args['W_ctr'] / 2. YR = np.mean(grids['z0'].bds[1]) + np.array([-1., 1.]) * args['W_ctr'] / 2. center_dzbar = lin_op(grids['dz'], name='center_dzbar', col_N=G_data.col_N).vstack([ lin_op(grids['dz']).mean_of_bounds( (XR, YR, [season, season])) for season in grids['dz'].ctrs[2] ]) G_dzbar = center_dzbar.toCSR() # calculate the grid mean of dz m['dz_bar'] = G_dzbar.dot(m0) # build a matrix that takes the lagged temporal derivative of dzbar (e.g. quarterly dzdt, annual dzdt) for lag in args['dzdt_lags']: this_name = 'dzdt_bar_lag%d' % lag this_op = lin_op(grids['t'], name=this_name).diff(lag=lag).toCSR() # calculate the grid mean of dz/dt m[this_name] = this_op.dot(m['dz_bar'].ravel()) # report the parameter biases. Sorted in order of the parameter bias arguments #??? if args['bias_params'] is not None: m['bias'] = parse_biases(m0, bias_model['bias_ID_dict'], args['bias_params']) # report the entire model vector, just in case we want it. m['all'] = m0 # report the geolocation of the output map m['extent'] = np.concatenate((grids['z0'].bds[1], grids['z0'].bds[0])) # parse the resduals to assess the contributions of the total error: # Make the C matrix for the constraints TCinv_cov = sp.dia_matrix((1. / Ec, 0), shape=(Gc.N_eq, Gc.N_eq)) rc = TCinv_cov.dot(Gc.toCSR().dot(m0)) ru = Gc.toCSR().dot(m0) R = dict() RMS = dict() for eq_type in ['d2z_dt2', 'grad2_z0', 'grad2_dzdt']: if eq_type in Gc.TOC['rows']: R[eq_type] = np.sum(rc[Gc.TOC['rows'][eq_type]]**2) RMS[eq_type] = np.sqrt(np.mean(ru[Gc.TOC['rows'][eq_type]]**2)) R['data'] = np.sum(((data.z_est - data.z) / data.sigma)**2) RMS['data'] = np.sqrt(np.mean((data.z_est - data.z)**2)) # if we need to compute the errors in the solution, continue if args['compute_E']: tic = time() # take the QZ transform of Gcoo z, R, perm, rank = sparseqr.qz(Ip_r.dot(TCinv.dot(Gcoo)), Ip_r.dot(TCinv.dot(rhs))) z = z.ravel() R = R.tocsr() R.sort_indices() R.eliminate_zeros() timing['decompose_qz'] = time() - tic E0 = np.zeros(R.shape[0]) # compute Rinv for use in propagating errors. # what should the tolerance be? We will eventually square Rinv and take its # row-wise sum. We care about errors at the cm level, so # size(Rinv)*tol^2 = 0.01 -> tol=sqrt(0.01/size(Rinv))~ 1E-4 tic = time() RR, CC, VV, status = inv_tr_upper(R, np.int(np.prod(R.shape) / 4), 1.e-5) # save Rinv as a sparse array. The syntax perm[RR] undoes the permutation from QZ Rinv = sp.coo_matrix((VV, (perm[RR], CC)), shape=R.shape).tocsr() timing['Rinv_cython'] = time() - tic tic = time() E0 = np.sqrt(Rinv.power(2).sum(axis=1)) timing['propagate_errors'] = time() - tic # generate the full E vector. E0 appears to be an ndarray, E0 = np.array(Ip_c.dot(E0)).ravel() E['z0'] = np.reshape(E0[Gc.TOC['cols']['z0']], grids['z0'].shape) E['dz'] = np.reshape(E0[Gc.TOC['cols']['dz']], grids['dz'].shape) # generate the lagged dz errors: for lag in args['dzdt_lags']: this_name = 'dzdt_lag%d' % lag E[this_name] = lin_op(grids['dz'], name=this_name, col_N=G_data.col_N).dzdt(lag=lag).grid_error( Ip_c.dot(Rinv)) this_name = 'dzdt_bar_lag%d' % lag this_op = lin_op(grids['t'], name=this_name).diff(lag=lag).toCSR() E[this_name] = np.sqrt( (this_op.dot(Ip_c).dot(Rinv)).power(2).sum(axis=1)) # calculate the grid mean of dz/dt # generate the season-to-season errors #E['dzdt_qyr']=lin_op(grids['dz'], name='dzdt_1yr', col_N=G_data.col_N).dzdt().grid_error(Ip_c.dot(Rinv)) # generate the annual errors #E['dzdt_1yr']=lin_op(grids['dz'], name='dzdt_1yr', col_N=G_data.col_N).dzdt(lag=4).grid_error(Ip_c.dot(Rinv)) # generate the grid-mean error E['dz_bar'] = np.sqrt( (G_dzbar.dot(Ip_c).dot(Rinv)).power(2).sum(axis=1)) # generate the grid-mean quarterly dzdt error #E['dzdt_bar_qyr']=np.sqrt((ddt_qyr.dot(G_dzbar).dot(Ip_c).dot(Rinv)).power(2).sum(axis=1)) # generate the grid-mean annual dzdt error #E['dzdt_bar_1yr']=np.sqrt((ddt_1yr.dot(G_dzbar).dot(Ip_c).dot(Rinv)).power(2).sum(axis=1)) # report the rgt bias errors. Sorted by RGT, then by cycle if args['bias_params'] is not None: E['bias'] = parse_biases(E0, bias_model['bias_ID_dict'], args['bias_params']) TOC = Gc.TOC return { 'm': m, 'E': E, 'data': data, 'grids': grids, 'valid_data': valid_data, 'TOC': TOC, 'R': R, 'RMS': RMS, 'timing': timing, 'E_RMS': args['E_RMS'] }
from LSsurf.fd_grid import fd_grid from LSsurf.lin_op import lin_op import scipy.sparse as sp import matplotlib.pyplot as plt import numpy as np from LSsurf.smooth_xytb_fit import sum_cell_area from LSsurf.smooth_xytb_fit import calc_cell_area from LSsurf.smooth_xytb_fit import setup_averaging_ops xc = np.array([0, -5.e5]) deltas = [100., 100.] bounds = [xc[1] + np.array([-3.e4, 3.e4]), xc[0] + np.array([-3.e4, 3.e4])] srs_proj4 = '+proj=stere +lat_0=90 +lat_ts=70 +lon_0=-45 +k=1 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs ' grid_z0 = fd_grid(bounds, deltas, srs_proj4=srs_proj4) grid_dz = fd_grid(bounds + [np.array([0, 5])], [1.e3, 1.e3, 0.25]) grid_10km = fd_grid(bounds + [np.array([0, 5])], [1.e4, 1.e4, 0.25]) mask = np.zeros(grid_z0.ctrs[0].size * np.array([1, 1])) mask[:, grid_z0.ctrs[1] < np.mean(grid_z0.ctrs[1]) - 150] = 1 grid_z0.mask = mask cell_area_0 = calc_cell_area(grid_z0) cell_area_1, op = sum_cell_area(grid_z0, grid_dz, return_op=True) args = {'avg_scales': [1.e4], 'dzdt_lags': [1, 4]} ops = setup_averaging_ops(grid_dz, grid_dz.col_N, args, cell_area=cell_area_1)
def smooth_xyt_fit(**kwargs): required_fields=('data','W','ctr','spacing','E_RMS') args={'reference_epoch':0, 'W_ctr':1e4, 'mask_file':None, 'mask_scale':None, 'compute_E':False, 'max_iterations':10, 'srs_proj4': None, 'N_subset': None, 'bias_params': None, 'repeat_res':None, 'converge_tol_dz':0.05, 'repeat_dt': 1, 'Edit_only': False, 'dzdt_lags':[1, 4], 'data_slope_sensors':None, 'E_slope':0.05, 'VERBOSE': True} args.update(kwargs) for field in required_fields: if field not in kwargs: raise ValueError("%s must be defined", field) valid_data = np.isfinite(args['data'].z) & np.isfinite(args['data'].sigma) timing=dict() if args['N_subset'] is not None: tic=time() valid_data &= edit_data_by_subset_fit(args['N_subset'], args) timing['edit_by_subset']=time()-tic if args['Edit_only']: return {'timing':timing, 'data':args['data'].copy()[valid_data]} m={} E={} R={} RMS={} # define the grids tic=time() bds={coord:args['ctr'][coord]+np.array([-0.5, 0.5])*args['W'][coord] for coord in ('x','y','t')} grids=dict() grids['z0']=fd_grid( [bds['y'], bds['x']], args['spacing']['z0']*np.ones(2),\ name='z0', srs_proj4=args['srs_proj4'], mask_file=args['mask_file']) grids['dz']=fd_grid( [bds['y'], bds['x'], bds['t']], \ [args['spacing']['dz'], args['spacing']['dz'], args['spacing']['dt']], \ name='dz', col_0=grids['z0'].N_nodes, srs_proj4=args['srs_proj4'], \ mask_file=args['mask_file']) grids['z0'].col_N=grids['dz'].col_N grids['t']=fd_grid([bds['t']], [args['spacing']['dt']], name='t') # select only the data points that are within the grid bounds valid_z0=grids['z0'].validate_pts((args['data'].coords()[0:2])) valid_dz=grids['dz'].validate_pts((args['data'].coords())) valid_data=valid_data & valid_dz & valid_z0 if not np.any(valid_data): return {'m':m, 'E':E, 'data':None, 'grids':grids, 'valid_data': valid_data, 'TOC':{},'R':{}, 'RMS':{}, 'timing':timing,'E_RMS':args['E_RMS']} # if repeat_res is given, resample the data to include only repeat data (to within a spatial tolerance of repeat_res) if args['repeat_res'] is not None: N_before_repeat=np.sum(valid_data) valid_data[valid_data]=valid_data[valid_data] & \ select_repeat_data(args['data'].copy_subset(valid_data), grids, args['repeat_dt'], args['repeat_res'], reference_time=grids['t'].ctrs[0][args['reference_epoch']]) if args['VERBOSE']: print("before repeat editing found %d data" % N_before_repeat) print("after repeat editing found %d data" % valid_data.sum()) # subset the data based on the valid mask data=args['data'].copy_subset(valid_data) # if we have a mask file, use it to subset the data # needs to be done after the valid subset because otherwise the interp_mtx for the mask file fails. if args['mask_file'] is not None: temp=fd_grid( [bds['y'], bds['x']], [args['spacing']['z0'], args['spacing']['z0']], name='z0', srs_proj4=args['srs_proj4'], mask_file=args['mask_file']) data_mask=lin_op(temp, name='interp_z').interp_mtx(data.coords()[0:2]).toCSR().dot(grids['z0'].mask.ravel()) data_mask[~np.isfinite(data_mask)]=0 if np.any(data_mask==0): data.index(~(data_mask==0)) valid_data[valid_data]= ~(data_mask==0) # Check if we have any data. If not, quit if data.size==0: return {'m':m, 'E':E, 'data':data, 'grids':grids, 'valid_data': valid_data, 'TOC':{},'R':{}, 'RMS':{}, 'timing':timing,'E_RMS':args['E_RMS']} # define the interpolation operator, equal to the sum of the dz and z0 operators G_data=lin_op(grids['z0'], name='interp_z').interp_mtx(data.coords()[0:2]) G_data.add(lin_op(grids['dz'], name='interp_dz').interp_mtx(data.coords())) # define the smoothness constraints grad2_z0=lin_op(grids['z0'], name='grad2_z0').grad2(DOF='z0') grad2_dz=lin_op(grids['dz'], name='grad2_dzdt').grad2_dzdt(DOF='z', t_lag=1) grad_dzdt=lin_op(grids['dz'], name='grad_dzdt').grad_dzdt(DOF='z', t_lag=1) constraint_op_list=[grad2_z0, grad2_dz, grad_dzdt] if 'd2z_dt2' in args['E_RMS'] and args['E_RMS']['d2z_dt2'] is not None: d2z_dt2=lin_op(grids['dz'], name='d2z_dt2').d2z_dt2(DOF='z') constraint_op_list.append(d2z_dt2) # if bias params are given, create a set of parameters to estimate them if args['bias_params'] is not None: data, bias_model=assign_bias_ID(data, args['bias_params']) G_bias, Gc_bias, Cvals_bias, bias_model=\ param_bias_matrix(data, bias_model, bias_param_name='bias_ID', col_0=grids['dz'].col_N) G_data.add(G_bias) constraint_op_list.append(Gc_bias) if args['data_slope_sensors'] is not None: bias_model['E_slope']=args['E_slope'] G_slope_bias, Gc_slope_bias, Cvals_slope_bias, bias_model= data_slope_bias(data, bias_model, sensors=args['data_slope_sensors'], col_0=G_data.col_N) G_data.add(G_slope_bias) constraint_op_list.append(Gc_slope_bias) # put the equations together Gc=lin_op(None, name='constraints').vstack(constraint_op_list) N_eq=G_data.N_eq+Gc.N_eq # put together all the errors Ec=np.zeros(Gc.N_eq) root_delta_V_dz=np.sqrt(np.prod(grids['dz'].delta)) root_delta_A_z0=np.sqrt(np.prod(grids['z0'].delta)) Ec[Gc.TOC['rows']['grad2_z0']]=args['E_RMS']['d2z0_dx2']/root_delta_A_z0*grad2_z0.mask_for_ind0(args['mask_scale']) Ec[Gc.TOC['rows']['grad2_dzdt']]=args['E_RMS']['d3z_dx2dt']/root_delta_V_dz*grad2_dz.mask_for_ind0(args['mask_scale']) Ec[Gc.TOC['rows']['grad_dzdt']]=args['E_RMS']['d2z_dxdt']/root_delta_V_dz*grad_dzdt.mask_for_ind0(args['mask_scale']) if 'd2z_dt2' in args['E_RMS'] and args['E_RMS']['d2z_dt2'] is not None: Ec[Gc.TOC['rows']['d2z_dt2']]=args['E_RMS']['d2z_dt2']/root_delta_V_dz if args['bias_params'] is not None: Ec[Gc.TOC['rows'][Gc_bias.name]] = Cvals_bias if args['data_slope_sensors'] is not None: Ec[Gc.TOC['rows'][Gc_slope_bias.name]] = Cvals_slope_bias Ed=data.sigma.ravel() # calculate the inverse square root of the data covariance matrix TCinv=sp.dia_matrix((1./np.concatenate((Ed, Ec)), 0), shape=(N_eq, N_eq)) # define the right hand side of the equation rhs=np.zeros([N_eq]) rhs[0:data.size]=data.z.ravel() # put the fit and constraint matrices together Gcoo=sp.vstack([G_data.toCSR(), Gc.toCSR()]).tocoo() cov_rows=G_data.N_eq+np.arange(Gc.N_eq) # build a matrix that takes the average of the center of the delta-z grid # this gets used both in the averaging and error-calculation codes XR=np.mean(grids['z0'].bds[0])+np.array([-1., 1.])*args['W_ctr']/2. YR=np.mean(grids['z0'].bds[1])+np.array([-1., 1.])*args['W_ctr']/2. center_dzbar=lin_op(grids['dz'], name='center_dzbar', col_N=G_data.col_N).vstack([lin_op(grids['dz']).mean_of_bounds((XR, YR, [season, season] )) for season in grids['dz'].ctrs[2]]) G_dzbar=center_dzbar.toCSR() # define the matrix that sets dz[reference_epoch]=0 by removing columns from the solution: # Find the rows and columns that match the reference epoch temp_r, temp_c=np.meshgrid(np.arange(0, grids['dz'].shape[0]), np.arange(0, grids['dz'].shape[1])) z02_mask=grids['dz'].global_ind([temp_r.transpose().ravel(), temp_c.transpose().ravel(),\ args['reference_epoch']+np.zeros_like(temp_r).ravel()]) # Identify all of the DOFs that do not include the reference epoch cols=np.arange(G_data.col_N, dtype='int') include_cols=np.setdiff1d(cols, z02_mask) # Generate a matrix that has diagonal elements corresponding to all DOFs except the reference epoch. # Multiplying this by a matrix with columns for all model parameters yeilds a matrix with no columns # corresponding to the reference epoch. Ip_c=sp.coo_matrix((np.ones_like(include_cols), (include_cols, np.arange(include_cols.size))), \ shape=(Gc.col_N, include_cols.size)).tocsc() # eliminate the columns for the model variables that are set to zero Gcoo=Gcoo.dot(Ip_c) timing['setup']=time()-tic # initialize the book-keeping matrices for the inversion m0=np.zeros(Ip_c.shape[0]) if "three_sigma_edit" in data.fields: inTSE=np.flatnonzero(data.three_sigma_edit) else: inTSE=np.arange(G_data.N_eq, dtype=int) inTSE_last = np.zeros([0]) if args['VERBOSE']: print("initial: %d:" % G_data.r.max()) tic_iteration=time() for iteration in range(args['max_iterations']): # build the parsing matrix that removes invalid rows Ip_r=sp.coo_matrix((np.ones(Gc.N_eq+inTSE.size), (np.arange(Gc.N_eq+inTSE.size), np.concatenate((inTSE, cov_rows)))), \ shape=(Gc.N_eq+inTSE.size, Gcoo.shape[0])).tocsc() m0_last=m0 if args['VERBOSE']: print("starting qr solve for iteration %d" % iteration) # solve the equations tic=time(); m0=Ip_c.dot(sparseqr.solve(Ip_r.dot(TCinv.dot(Gcoo)), Ip_r.dot(TCinv.dot(rhs)))); timing['sparseqr_solve']=time()-tic # calculate the full data residual rs_data=(data.z-G_data.toCSR().dot(m0))/data.sigma # calculate the robust standard deviation of the scaled residuals for the selected data sigma_hat=RDE(rs_data[inTSE]) # select the data that have scaled residuals < 3 *max(1, sigma_hat) inTSE_last=inTSE inTSE = np.flatnonzero(np.abs(rs_data) < 3.0 * np.maximum(1, sigma_hat)) # quit if the solution is too similar to the previous solution if (np.max(np.abs((m0_last-m0)[Gc.TOC['cols']['dz']])) < args['converge_tol_dz']) and (iteration > 2): if args['VERBOSE']: print("Solution identical to previous iteration with tolerance %3.1f, exiting after iteration %d" % (args['converge_tol_dz'], iteration)) break # select the data that are within 3*sigma of the solution if args['VERBOSE']: print('found %d in TSE, sigma_hat=%3.3f' % ( inTSE.size, sigma_hat )) if iteration > 0: if inTSE.size == inTSE_last.size and np.all( inTSE_last == inTSE ): if args['VERBOSE']: print("filtering unchanged, exiting after iteration %d" % iteration) break if iteration >= 2: if sigma_hat <= 1: if args['VERBOSE']: print("sigma_hat LT 1, exiting after iteration %d" % iteration) break # if we've done any iterations, parse the model and the data residuals if args['max_iterations'] > 0: timing['iteration']=time()-tic_iteration inTSE=inTSE_last valid_data[valid_data]=(np.abs(rs_data)<3.0*np.maximum(1, sigma_hat)) data.assign({'three_sigma_edit':np.abs(rs_data)<3.0*np.maximum(1, sigma_hat)}) # report the model-based estimate of the data points data.assign({'z_est':np.reshape(G_data.toCSR().dot(m0), data.shape)}) parse_model(m, m0, G_data, G_dzbar, Gc.TOC, grids, args['bias_params'], bias_model, dzdt_lags=args['dzdt_lags']) # parse the resduals to assess the contributions of the total error: # Make the C matrix for the constraints TCinv_cov=sp.dia_matrix((1./Ec, 0), shape=(Gc.N_eq, Gc.N_eq)) rc=TCinv_cov.dot(Gc.toCSR().dot(m0)) ru=Gc.toCSR().dot(m0) for eq_type in ['d2z_dt2','grad2_z0','grad2_dzdt']: if eq_type in Gc.TOC['rows']: R[eq_type]=np.sum(rc[Gc.TOC['rows'][eq_type]]**2) RMS[eq_type]=np.sqrt(np.mean(ru[Gc.TOC['rows'][eq_type]]**2)) R['data']=np.sum((((data.z_est[data.three_sigma_edit==1]-data.z[data.three_sigma_edit==1])/data.sigma[data.three_sigma_edit==1])**2)) RMS['data']=np.sqrt(np.mean((data.z_est[data.three_sigma_edit==1]-data.z[data.three_sigma_edit==1])**2)) # Compute the error in the solution if requested if args['compute_E']: # We have generally not done any iterations at this point, so need to make the Ip_r matrix Ip_r=sp.coo_matrix((np.ones(Gc.N_eq+inTSE.size), (np.arange(Gc.N_eq+inTSE.size), np.concatenate((inTSE, cov_rows)))), \ shape=(Gc.N_eq+inTSE.size, Gcoo.shape[0])).tocsc() parse_errors(E, Gcoo, TCinv, rhs, Ip_c, Ip_r, grids, G_data, Gc, G_dzbar, \ bias_model, args['bias_params'], dzdt_lags=args['dzdt_lags'], timing=timing) TOC=Gc.TOC return {'m':m, 'E':E, 'data':data, 'grids':grids, 'valid_data': valid_data, 'TOC':TOC,'R':R, 'RMS':RMS, 'timing':timing,'E_RMS':args['E_RMS'], 'dzdt_lags':args['dzdt_lags']}
def glas_fit(xy0=np.array((-150000, -2000000)), W0, D=None, E_RMS=None, gI=None, giFile='/Data/glas/GL/rel_634/GeoIndex.h5'): if gI is None: gI=geo_index().from_file(giFile) #import_D=False; print("WARNING::::::REUSING D") timing=dict() xy0=np.array((-150000, -2000000)) E_RMS={'d2z0_dx2':20000./3000/3000, 'd3z_dx2dt':10./3000/3000, 'd2z_dxdt':100/3000, 'd2z_dt2':1} W={'x':W0, 'y':W0,'t':6} spacing={'z0':5.e2, 'dzdt':5.e3} args={'W':W, 'ctr':ctr, 'spacing':spacing, 'E_RMS':E_RMS, 'max_iterations':25} if D is None: fields=[ 'IceSVar', 'deltaEllip', 'numPk', 'ocElv', 'reflctUC', 'satElevCorr', 'time', 'x', 'y', 'z'] ctr={'x':xy0[0], 'y':xy0[1], 't':(2003+2009)/2. } D=gI.query_xy_box(xy0[0]+np.array([-W['x']/2, W['x']/2]), xy0[1]+np.array([-W['y']/2, W['y']/2]), fields=fields) #plt.plot(xy[0], xy[1],'.') #plt.plot(xy0[0], xy0[1],'r*') D.assign({'year': matlabToYear(D.time)}) good=(D.IceSVar < 0.035) & (D.reflctUC >0.05) & (D.satElevCorr < 1) & (D.numPk==1) D.subset(good, datasets=['x','y','z','year']) D.assign({'sigma':np.zeros_like(D.x)+0.2, 'time':D.year}) plt.plot(D.x, D.y,'m.') bds={coord:args['ctr'][coord]+np.array([-0.5, 0.5])*args['W'][coord] for coord in ('x','y')} grids=dict() grids['z0']=fd_grid( [bds['y'], bds['x']], args['spacing']['z0']*np.ones(2), name='z0') grids['dzdt']=fd_grid( [bds['y'], bds['x']], args['spacing']['dzdt']*np.ones(2), \ col_0=grids['z0'].col_N+1, name='dzdt') valid_z0=grids['z0'].validate_pts((D.coords()[0:2])) valid_dz=grids['dzdt'].validate_pts((D.coords())) valid_data=valid_dz & valid_z0 D=D.subset(valid_data) G_data=lin_op(grids['z0'], name='interp_z').interp_mtx(D.coords()[0:2]) G_dzdt=lin_op(grids['dzdt'], name='dzdt').interp_mtx(D.coords()[0:2]) G_dzdt.v *= (D.year[G_dzdt.r.astype(int)]-ctr['t']) G_data.add(G_dzdt) grad2_z0=lin_op(grids['z0'], name='grad2_z0').grad2(DOF='z0') grad_z0=lin_op(grids['z0'], name='grad_z0').grad(DOF='z0') grad2_dzdt=lin_op(grids['dzdt'], name='grad2_dzdt').grad2(DOF='dzdt') grad_dzdt=lin_op(grids['dzdt'], name='grad_dzdt').grad2(DOF='dzdt') Gc=lin_op(None, name='constraints').vstack((grad2_z0, grad_z0, grad2_dzdt, grad_dzdt)) Ec=np.zeros(Gc.N_eq) root_delta_A_z0=np.sqrt(np.prod(grids['z0'].delta)) Ec[Gc.TOC['rows']['grad2_z0']]=args['E_RMS']['d2z0_dx2']/root_delta_A_z0 Ec[Gc.TOC['rows']['grad2_dzdt']]=args['E_RMS']['d3z_dx2dt']/root_delta_A_z0 Ec[Gc.TOC['rows']['grad_z0']]=1.e4*args['E_RMS']['d2z0_dx2']/root_delta_A_z0 Ec[Gc.TOC['rows']['grad_dzdt']]=1.e4*args['E_RMS']['d3z_dx2dt']/root_delta_A_z0 Ed=D.sigma.ravel() N_eq=G_data.N_eq+Gc.N_eq # calculate the inverse square root of the data covariance matrix TCinv=sp.dia_matrix((1./np.concatenate((Ed, Ec)), 0), shape=(N_eq, N_eq)) # define the right hand side of the equation rhs=np.zeros([N_eq]) rhs[0:D.x.size]=D.z.ravel() # put the fit and constraint matrices together Gcoo=sp.vstack([G_data.toCSR(), Gc.toCSR()]).tocoo() cov_rows=G_data.N_eq+np.arange(Gc.N_eq) # initialize the book-keeping matrices for the inversion m0=np.zeros(Gcoo.shape[1]) inTSE=np.arange(G_data.N_eq, dtype=int) for iteration in range(args['max_iterations']): # build the parsing matrix that removes invalid rows Ip_r=sp.coo_matrix((np.ones(Gc.N_eq+inTSE.size), (np.arange(Gc.N_eq+inTSE.size), np.concatenate((inTSE, cov_rows)))), shape=(Gc.N_eq+inTSE.size, Gcoo.shape[0])).tocsc() m0_last=m0 # solve the equations tic=time(); m0=sparseqr.solve(Ip_r.dot(TCinv.dot(Gcoo)), Ip_r.dot(TCinv.dot(rhs))); timing['sparseqr_solve']=time()-tic # quit if the solution is too similar to the previous solution if np.max(np.abs((m0_last-m0)[Gc.TOC['cols']['dzdt']])) < 0.05: break # calculate the full data residual rs_data=(D.z-G_data.toCSR().dot(m0))/D.sigma # calculate the robust standard deviation of the scaled residuals for the selected data sigma_hat=RDE(rs_data[inTSE]) inTSE_last=inTSE # select the data that are within 3*sigma of the solution inTSE=np.where(np.abs(rs_data)<3.0*sigma_hat)[0] print('found %d in TSE, sigma_hat=%3.3f' % (inTSE.size, sigma_hat)) if sigma_hat <= 1 or( inTSE.size == inTSE_last.size and np.all( inTSE_last == inTSE )): break m=dict() m['z0']=m0[Gc.TOC['cols']['z0']].reshape(grids['z0'].shape) m['dzdt']=m0[Gc.TOC['cols']['dzdt']].reshape(grids['dzdt'].shape) if DOPLOT: plt.subplot(121) plt.imshow(m['z0']) plt.colorbar() plt.subplot(122) plt.imshow(m['dzdt']) plt.colorbar() if False: plt.figure() Dfinal=D.subset(inTSE) ii=np.argsort(Dfinal.z) plt.scatter(Dfinal.x[ii], Dfinal.y[ii], c=Dfinal.z[ii]); plt.colorbar() return grids, m, D, inTSE, sigma_hat