def estimate_S1AB_bias(mintpy_dir, dates, ts_dis): """Estimate the bias between Sentinel-1 A and B. Parameters: mintpy_dir - str, path of the mintpy working directory dates - list of datetime.datetime objects ts_dis - 2D np.ndarray in size of (num_date, num_pixel) in float32 Returns: bias - 1D np.ndarray in size of (num_pixel) in float32 flagA/B - 1D np.ndarray in size of (num_date) in bool dates_fit - list of datetime.datetime objects ts_fitA/B - 1D np.ndarray in size of (num_date_fit) in float32 """ num_date = len(dates) ts_dis = ts_dis.reshape(num_date, -1) # dates/flags for S1A/B date_listA = np.loadtxt(os.path.join(mintpy_dir, 'S1A_date.txt'), dtype=str).tolist() date_listB = np.loadtxt(os.path.join(mintpy_dir, 'S1B_date.txt'), dtype=str).tolist() date_list = sorted(date_listA + date_listB) min_date = date_listB[0] flagA = np.array([x in date_listA and x >= min_date for x in date_list], dtype=np.bool_) flagB = np.array([x in date_listB and x >= min_date for x in date_list], dtype=np.bool_) # update date_list to the shared time period only date_listA = np.array(date_list)[flagA].tolist() date_listB = np.array(date_list)[flagB].tolist() # fit model = dict(polynomial=1) mA = time_func.estimate_time_func(model, date_listA, ts_dis[flagA, :], ref_date=date_listA[0])[1] mB = time_func.estimate_time_func(model, date_listB, ts_dis[flagB, :], ref_date=date_listB[0])[1] # grab bias/offset from the fitting time-series date_list_fit = ptime.get_date_range(min_date, date_list[-1], dstep=1) dates_fit = ptime.date_list2vector(date_list_fit)[0] GA_fit = time_func.get_design_matrix4time_func(date_list_fit, model, ref_date=date_listA[0]) GB_fit = time_func.get_design_matrix4time_func(date_list_fit, model, ref_date=date_listB[0]) ts_fitA = np.matmul(GA_fit, mA) ts_fitB = np.matmul(GB_fit, mB) bias = np.median(ts_fitB - ts_fitA, axis=0) return bias, flagA, flagB, dates_fit, ts_fitA, ts_fitB
def get_gps_los_velocity(self, geom_obj, start_date=None, end_date=None, ref_site=None, gps_comp='enu2los', horz_az_angle=-90.): dates, dis = self.read_gps_los_displacement( geom_obj, start_date=start_date, end_date=end_date, ref_site=ref_site, gps_comp=gps_comp, horz_az_angle=horz_az_angle)[:2] # displacement -> velocity # if: # 1. num of observations > 2 AND # 2. time overlap > 1/4 dis2vel = True if len(dates) <= 2: dis2vel = False elif start_date and end_date: t0 = ptime.date_list2vector([start_date])[0][0] t1 = ptime.date_list2vector([end_date])[0][0] if dates[-1] - dates[0] <= (t1 - t0) / 4: dis2vel = False if dis2vel: date_list = [dt.datetime.strftime(i, '%Y%m%d') for i in dates] A = time_func.get_design_matrix4time_func(date_list) self.velocity = np.dot(np.linalg.pinv(A), dis)[1] else: self.velocity = np.nan return self.velocity, dis
def timeseries2velocity(date_list, defo_list): # date_list --> design_matrix A = time_func.get_design_matrix4time_func(date_list) A_inv = np.linalg.pinv(A) # least square inversion defo = np.array(defo_list, np.float32).reshape(-1, 1) vel = np.dot(A_inv, defo)[1, :] return vel
def get_design_matrix4defo(inps): """Get the design matrix for ground surface deformation Parameters: inps - namespace Returns: G_defo - 2D np.ndarray in float32 in size of [num_date, num_param] """ # key msg msg = '-' * 80 msg += '\ncorrect topographic phase residual (DEM error) (Fattahi & Amelung, 2013, IEEE-TGRS)' msg += '\nordinal least squares (OLS) inversion with L2-norm minimization on: phase' if inps.phaseVelocity: msg += ' velocity' msg += "\ntemporal deformation model: polynomial order = {}".format( inps.polyOrder) if inps.stepFuncDate: msg += "\ntemporal deformation model: step functions at {}".format( inps.stepFuncDate) if inps.periodic: msg += "\ntemporal deformation model: periodic functions of {} yr".format( inps.periodic) msg += '\n' + '-' * 80 print(msg) # prepare temporal deformation model model = dict() model['polynomial'] = inps.polyOrder model['step'] = inps.stepFuncDate model['periodic'] = inps.periodic # prepare SAR info ts_obj = timeseries(inps.timeseries_file) date_list = ts_obj.get_date_list() seconds = ts_obj.get_metadata().get('CENTER_LINE_UTC', 0) # compose design matrix G_defo = time_func.get_design_matrix4time_func(date_list, model, seconds=seconds) return G_defo
def read_init_info(inps): # Time Series Info atr = readfile.read_attribute(inps.file[0]) inps.key = atr['FILE_TYPE'] if inps.key == 'timeseries': obj = timeseries(inps.file[0]) elif inps.key == 'giantTimeseries': obj = giantTimeseries(inps.file[0]) elif inps.key == 'HDFEOS': obj = HDFEOS(inps.file[0]) else: raise ValueError('input file is {}, not timeseries.'.format(inps.key)) obj.open(print_msg=inps.print_msg) inps.seconds = atr.get('CENTER_LINE_UTC', 0) if not inps.file_label: inps.file_label = [] for fname in inps.file: fbase = os.path.splitext(os.path.basename(fname))[0] fbase = fbase.replace('timeseries', '') inps.file_label.append(fbase) # default mask file if not inps.mask_file and 'msk' not in inps.file[0]: dir_name = os.path.dirname(inps.file[0]) if 'Y_FIRST' in atr.keys(): inps.mask_file = os.path.join(dir_name, 'geo_maskTempCoh.h5') else: inps.mask_file = os.path.join(dir_name, 'maskTempCoh.h5') if not os.path.isfile(inps.mask_file): inps.mask_file = None ## date info inps.date_list = obj.dateList inps.num_date = len(inps.date_list) if inps.start_date: inps.date_list = [i for i in inps.date_list if int(i) >= int(inps.start_date)] if inps.end_date: inps.date_list = [i for i in inps.date_list if int(i) <= int(inps.end_date)] inps.num_date = len(inps.date_list) inps.dates, inps.yearList = ptime.date_list2vector(inps.date_list) (inps.ex_date_list, inps.ex_dates, inps.ex_flag) = read_exclude_date(inps.ex_date_list, inps.date_list) # reference date/index if not inps.ref_date: inps.ref_date = atr.get('REF_DATE', None) if inps.ref_date: inps.ref_idx = inps.date_list.index(inps.ref_date) else: inps.ref_idx = None # date/index of interest for initial display if not inps.idx: if (not inps.ref_idx) or (inps.ref_idx < inps.num_date / 2.): inps.idx = inps.num_date - 2 else: inps.idx = 2 # Display Unit (inps.disp_unit, inps.unit_fac) = pp.scale_data2disp_unit(metadata=atr, disp_unit=inps.disp_unit)[1:3] # Map info - coordinate unit inps.coord_unit = atr.get('Y_UNIT', 'degrees').lower() # Read Error List inps.ts_plot_func = plot_ts_scatter inps.error_ts = None inps.ex_error_ts = None if inps.error_file: # assign plot function inps.ts_plot_func = plot_ts_errorbar # read error file error_fc = np.loadtxt(inps.error_file, dtype=bytes).astype(str) inps.error_ts = error_fc[:, 1].astype(np.float)*inps.unit_fac # update error file with exlcude date if inps.ex_date_list: e_ts = inps.error_ts[:] inps.ex_error_ts = e_ts[inps.ex_flag == 0] inps.error_ts = e_ts[inps.ex_flag == 1] # Zero displacement for 1st acquisition if inps.zero_first: inps.zero_idx = min(0, np.min(np.where(inps.ex_flag)[0])) # default lookup table file and coordinate object if not inps.lookup_file: inps.lookup_file = ut.get_lookup_file('./inputs/geometryRadar.h5') inps.coord = ut.coordinate(atr, inps.lookup_file) ## size and lalo info inps.pix_box, inps.geo_box = subset.subset_input_dict2box(vars(inps), atr) inps.pix_box = inps.coord.check_box_within_data_coverage(inps.pix_box) inps.geo_box = inps.coord.box_pixel2geo(inps.pix_box) data_box = (0, 0, int(atr['WIDTH']), int(atr['LENGTH'])) vprint('data coverage in y/x: '+str(data_box)) vprint('subset coverage in y/x: '+str(inps.pix_box)) vprint('data coverage in lat/lon: '+str(inps.coord.box_pixel2geo(data_box))) vprint('subset coverage in lat/lon: '+str(inps.geo_box)) vprint('------------------------------------------------------------------------') # calculate multilook_num # ONLY IF: # inps.multilook is True (no --nomultilook input) AND # inps.multilook_num ==1 (no --multilook-num input) # Note: inps.multilook is used for this check ONLY # Note: multilooking is only applied to the 3D data cubes and their related operations: # e.g. spatial indexing, referencing, etc. All the other variables are in the original grid # so that users get the same result as the non-multilooked version. if inps.multilook and inps.multilook_num == 1: inps.multilook_num = pp.auto_multilook_num(inps.pix_box, inps.num_date, max_memory=inps.maxMemory, print_msg=inps.print_msg) ## reference pixel if not inps.ref_lalo and 'REF_LAT' in atr.keys(): inps.ref_lalo = (float(atr['REF_LAT']), float(atr['REF_LON'])) if inps.ref_lalo: # set longitude to [-180, 180) if inps.coord_unit.lower().startswith('deg') and inps.ref_lalo[1] >= 180.: inps.ref_lalo[1] -= 360. # ref_lalo --> ref_yx if not set in cmd if not inps.ref_yx: inps.ref_yx = inps.coord.geo2radar(inps.ref_lalo[0], inps.ref_lalo[1], print_msg=False)[0:2] # use REF_Y/X if ref_yx not set in cmd if not inps.ref_yx and 'REF_Y' in atr.keys(): inps.ref_yx = (int(atr['REF_Y']), int(atr['REF_X'])) # ref_yx --> ref_lalo if in geo-coord # for plotting purpose only if inps.ref_yx and 'Y_FIRST' in atr.keys(): inps.ref_lalo = inps.coord.radar2geo(inps.ref_yx[0], inps.ref_yx[1], print_msg=False)[0:2] # do not plot native reference point if it's out of the coverage due to subset if (inps.ref_yx and 'Y_FIRST' in atr.keys() and inps.ref_yx == (int(atr.get('REF_Y',-999)), int(atr.get('REF_X',-999))) and not ( inps.pix_box[0] <= inps.ref_yx[1] < inps.pix_box[2] and inps.pix_box[1] <= inps.ref_yx[0] < inps.pix_box[3])): inps.disp_ref_pixel = False print('the native REF_Y/X is out of subset box, thus do not display') ## initial pixel coord if inps.lalo: inps.yx = inps.coord.geo2radar(inps.lalo[0], inps.lalo[1], print_msg=False)[0:2] try: inps.lalo = inps.coord.radar2geo(inps.yx[0], inps.yx[1], print_msg=False)[0:2] except: inps.lalo = None ## figure settings # Flip up-down / left-right if inps.auto_flip: inps.flip_lr, inps.flip_ud = pp.auto_flip_direction(atr, print_msg=inps.print_msg) # Transparency - Alpha if not inps.transparency: # Auto adjust transparency value when showing shaded relief DEM if inps.dem_file and inps.disp_dem_shade: inps.transparency = 0.7 else: inps.transparency = 1.0 ## display unit ans wrap # if wrap_step == 2*np.pi (default value), set disp_unit_img = radian; # otherwise set disp_unit_img = disp_unit inps.disp_unit_img = inps.disp_unit if inps.wrap: inps.range2phase = -4. * np.pi / float(atr['WAVELENGTH']) if 'cm' == inps.disp_unit.split('/')[0]: inps.range2phase /= 100. elif 'mm' == inps.disp_unit.split('/')[0]: inps.range2phase /= 1000. elif 'm' == inps.disp_unit.split('/')[0]: inps.range2phase /= 1. else: raise ValueError('un-recognized display unit: {}'.format(inps.disp_unit)) if (inps.wrap_range[1] - inps.wrap_range[0]) == 2*np.pi: inps.disp_unit_img = 'radian' inps.vlim = inps.wrap_range inps.cbar_label = 'Displacement [{}]'.format(inps.disp_unit_img) ## fit a suite of time func to the time series inps.model, inps.num_param = ts2vel.read_inps2model(inps, date_list=inps.date_list) # dense TS for plotting inps.date_list_fit = ptime.get_date_range(inps.date_list[0], inps.date_list[-1]) inps.dates_fit = ptime.date_list2vector(inps.date_list_fit)[0] inps.G_fit = time_func.get_design_matrix4time_func( date_list=inps.date_list_fit, model=inps.model, seconds=inps.seconds) return inps, atr
def run_timeseries2time_func(inps): # basic info atr = readfile.read_attribute(inps.timeseries_file) length, width = int(atr['LENGTH']), int(atr['WIDTH']) num_date = inps.numDate dates = np.array(inps.dateList) seconds = atr.get('CENTER_LINE_UTC', 0) # use the 1st date as reference if not found, e.g. timeseriesResidual.h5 file if "REF_DATE" not in atr.keys() and not inps.ref_date: inps.ref_date = inps.dateList[0] print( 'WARNING: No REF_DATE found in time-series file or input in command line.' ) print(' Set "--ref-date {}" and continue.'.format(inps.dateList[0])) # get deformation model from parsers model, num_param = read_inps2model(inps) ## output preparation # time_func_param: attributes atrV = dict(atr) atrV['FILE_TYPE'] = 'velocity' atrV['UNIT'] = 'm/year' atrV['START_DATE'] = inps.dateList[0] atrV['END_DATE'] = inps.dateList[-1] atrV['DATE12'] = '{}_{}'.format(inps.dateList[0], inps.dateList[-1]) if inps.ref_yx: atrV['REF_Y'] = inps.ref_yx[0] atrV['REF_X'] = inps.ref_yx[1] if inps.ref_date: atrV['REF_DATE'] = inps.ref_date # time_func_param: config parameter print('add/update the following configuration metadata:\n{}'.format( configKeys)) for key in configKeys: atrV[key_prefix + key] = str(vars(inps)[key]) # time_func_param: instantiate output file ds_name_dict, ds_unit_dict = model2hdf5_dataset(model, ds_shape=(length, width))[1:] writefile.layout_hdf5(inps.outfile, metadata=atrV, ds_name_dict=ds_name_dict, ds_unit_dict=ds_unit_dict) # timeseries_res: attributes + instantiate output file if inps.save_res: atrR = dict(atr) # remove REF_DATE attribute for key in ['REF_DATE']: if key in atrR.keys(): atrR.pop(key) # prepare ds_name_dict manually, instead of using ref_file, to support --ex option date_len = len(inps.dateList[0]) ds_name_dict = { "date": [ np.dtype(f'S{date_len}'), (num_date, ), np.array(inps.dateList, dtype=np.string_) ], "timeseries": [np.float32, (num_date, length, width), None] } writefile.layout_hdf5(inps.res_file, ds_name_dict=ds_name_dict, metadata=atrR) ## estimation # calc number of box based on memory limit memoryAll = (num_date + num_param * 2 + 2) * length * width * 4 if inps.bootstrap: memoryAll += inps.bootstrapCount * num_param * length * width * 4 num_box = int(np.ceil(memoryAll * 3 / (inps.maxMemory * 1024**3))) box_list = cluster.split_box2sub_boxes(box=(0, 0, width, length), num_split=num_box, dimension='y', print_msg=True) # loop for block-by-block IO for i, box in enumerate(box_list): box_wid = box[2] - box[0] box_len = box[3] - box[1] num_pixel = box_len * box_wid if num_box > 1: print('\n------- processing patch {} out of {} --------------'. format(i + 1, num_box)) print('box width: {}'.format(box_wid)) print('box length: {}'.format(box_len)) # initiate output m = np.zeros((num_param, num_pixel), dtype=dataType) m_std = np.zeros((num_param, num_pixel), dtype=dataType) # read input print('reading data from file {} ...'.format(inps.timeseries_file)) ts_data = readfile.read(inps.timeseries_file, box=box)[0] # referencing in time and space # for file w/o reference info. e.g. ERA5.h5 if inps.ref_date: print('referecing to date: {}'.format(inps.ref_date)) ref_ind = inps.dateList.index(inps.ref_date) ts_data -= np.tile(ts_data[ref_ind, :, :], (ts_data.shape[0], 1, 1)) if inps.ref_yx: print('referencing to point (y, x): ({}, {})'.format( inps.ref_yx[0], inps.ref_yx[1])) ref_box = (inps.ref_yx[1], inps.ref_yx[0], inps.ref_yx[1] + 1, inps.ref_yx[0] + 1) ref_val = readfile.read(inps.timeseries_file, box=ref_box)[0] ts_data -= np.tile(ref_val.reshape(ts_data.shape[0], 1, 1), (1, ts_data.shape[1], ts_data.shape[2])) ts_data = ts_data[inps.dropDate, :, :].reshape(inps.numDate, -1) if atrV['UNIT'] == 'mm': ts_data *= 1. / 1000. ts_cov = None if inps.ts_cov_file: print( f'reading time-series covariance matrix from file {inps.ts_cov_file} ...' ) ts_cov = readfile.read(inps.ts_cov_file, box=box)[0] if len(ts_cov.shape) == 4: # full covariance matrix in 4D --> 3D if inps.numDate < ts_cov.shape[0]: ts_cov = ts_cov[inps.dropDate, :, :, :] ts_cov = ts_cov[:, inps.dropDate, :, :] ts_cov = ts_cov.reshape(inps.numDate, inps.numDate, -1) elif len(ts_cov.shape) == 3: # diaginal variance matrix in 3D --> 2D if inps.numDate < ts_cov.shape[0]: ts_cov = ts_cov[inps.dropDate, :, :] ts_cov = ts_cov.reshape(inps.numDate, -1) ## set zero value to a fixed small value to avoid divide by zero #epsilon = 1e-5 #ts_cov[ts_cov<epsilon] = epsilon # mask invalid pixels print('skip pixels with zero/nan value in all acquisitions') ts_stack = np.nanmean(ts_data, axis=0) mask = np.multiply(~np.isnan(ts_stack), ts_stack != 0.) del ts_stack #if ts_cov is not None: # print('skip pxiels with nan STD value in any acquisition') # num_std_nan = np.sum(np.isnan(ts_cov), axis=0) # mask *= num_std_nan == 0 # del num_std_nan ts_data = ts_data[:, mask] num_pixel2inv = int(np.sum(mask)) idx_pixel2inv = np.where(mask)[0] print('number of pixels to invert: {} out of {} ({:.1f}%)'.format( num_pixel2inv, num_pixel, num_pixel2inv / num_pixel * 100)) # go to next if no valid pixel found if num_pixel2inv == 0: continue ### estimation / solve Gm = d print('estimating time functions via linalg.lstsq ...') if inps.bootstrap: ## option 1 - least squares with bootstrapping # Bootstrapping is a resampling method which can be used to estimate properties # of an estimator. The method relies on independently sampling the data set with # replacement. print( 'estimating time function STD with bootstrap resampling ({} times) ...' .format(inps.bootstrapCount)) # calc model of all bootstrap sampling rng = np.random.default_rng() m_boot = np.zeros((inps.bootstrapCount, num_param, num_pixel2inv), dtype=dataType) prog_bar = ptime.progressBar(maxValue=inps.bootstrapCount) for i in range(inps.bootstrapCount): # bootstrap resampling boot_ind = rng.choice(inps.numDate, size=inps.numDate, replace=True) boot_ind.sort() # estimation m_boot[i] = time_func.estimate_time_func( model=model, date_list=dates[boot_ind].tolist(), dis_ts=ts_data[boot_ind], seconds=seconds)[1] prog_bar.update(i + 1, suffix='iteration {} / {}'.format( i + 1, inps.bootstrapCount)) prog_bar.close() #del ts_data # get mean/std among all bootstrap sampling m[:, mask] = m_boot.mean(axis=0).reshape(num_param, -1) m_std[:, mask] = m_boot.std(axis=0).reshape(num_param, -1) del m_boot # get design matrix to calculate the residual time series G = time_func.get_design_matrix4time_func(inps.dateList, model=model, ref_date=inps.ref_date, seconds=seconds) else: ## option 2 - least squares with uncertainty propagation G, m[:, mask], e2 = time_func.estimate_time_func( model=model, date_list=inps.dateList, dis_ts=ts_data, seconds=seconds) #del ts_data ## Compute the covariance matrix for model parameters: # G * m = d # C_m_hat = G+ * C_d * G+.T # # For ordinary least squares estimation: # G+ = (G.T * G)^-1 * G.T (option 2.1) # # For weighted least squares estimation: # G+ = (G.T * C_d^-1 * G)^-1 * G.T * C_d^-1 # => C_m_hat = (G.T * C_d^-1 * G)^-1 (option 2.2) # # Assuming normality of the observation errors (in the time domain) with a variance of sigma^2 # we have C_d = sigma^2 * I, then the above equation is simplfied into: # C_m_hat = sigma^2 * (G.T * G)^-1 (option 2.3) # # Based on the law of integrated expectation, we estimate the obs sigma^2 using # the OLS estimation residual as: # e_hat = d - d_hat # => sigma_hat^2 = (e_hat.T * e_hat) / N # => sigma^2 = sigma_hat^2 * N / (N - P) (option 2.4) # = (e_hat.T * e_hat) / (N - P) # which is the equation (10) from Fattahi and Amelung (2015, JGR) if ts_cov is not None: # option 2.1 - linear propagation from time-series (co)variance matrix # TO DO: save the full covariance matrix of the time function parameters # only the STD is saved right now covar_flag = True if len(ts_cov.shape) == 3 else False msg = 'estimating time function STD from time-serries ' msg += 'covariance pixel-by-pixel ...' if covar_flag else 'variance pixel-by-pixel ...' print(msg) # calc the common pseudo-inverse matrix Gplus = linalg.pinv(G) # loop over each pixel # or use multidimension matrix multiplication # m_cov = Gplus @ ts_cov @ Gplus.T prog_bar = ptime.progressBar(maxValue=num_pixel2inv) for i in range(num_pixel2inv): idx = idx_pixel2inv[i] # cov: time-series -> time func ts_covi = ts_cov[:, :, idx] if covar_flag else np.diag( ts_cov[:, idx]) m_cov = np.linalg.multi_dot([Gplus, ts_covi, Gplus.T]) m_std[:, idx] = np.sqrt(np.diag(m_cov)) prog_bar.update(i + 1, every=200, suffix='{}/{} pixels'.format( i + 1, num_pixel2inv)) prog_bar.close() else: # option 2.3 - assume obs errors following normal dist. in time print( 'estimating time function STD from time-series fitting residual ...' ) G_inv = linalg.inv(np.dot(G.T, G)) m_var = e2.reshape(1, -1) / (num_date - num_param) m_std[:, mask] = np.sqrt( np.dot(np.diag(G_inv).reshape(-1, 1), m_var)) # option 2.4 - simplified form for linear velocity (without matrix linear algebra) # The STD can also be calculated using Eq. (10) from Fattahi and Amelung (2015, JGR) # ts_diff = ts_data - np.dot(G, m) # t_diff = G[:, 1] - np.mean(G[:, 1]) # vel_std = np.sqrt(np.sum(ts_diff ** 2, axis=0) / np.sum(t_diff ** 2) / (num_date - 2)) # write - time func params block = [box[1], box[3], box[0], box[2]] ds_dict = model2hdf5_dataset(model, m, m_std, mask=mask)[0] for ds_name, data in ds_dict.items(): writefile.write_hdf5_block(inps.outfile, data=data.reshape(box_len, box_wid), datasetName=ds_name, block=block) # write - residual file if inps.save_res: block = [0, num_date, box[1], box[3], box[0], box[2]] ts_res = np.ones( (num_date, box_len * box_wid), dtype=np.float32) * np.nan ts_res[:, mask] = ts_data - np.dot(G, m)[:, mask] writefile.write_hdf5_block(inps.res_file, data=ts_res.reshape( num_date, box_len, box_wid), datasetName='timeseries', block=block) return inps.outfile