def subset_file(fname, subset_dict_input, out_file=None): """Subset file with Inputs: fname : str, path/name of file out_file : str, path/name of output file subset_dict : dict, subsut parameter, including the following items: subset_x : list of 2 int, subset in x direction, default=None subset_y : list of 2 int, subset in y direction, default=None subset_lat : list of 2 float, subset in lat direction, default=None subset_lon : list of 2 float, subset in lon direction, default=None fill_value : float, optional. filled value for area outside of data coverage. default=None None/not-existed to subset within data coverage only. tight : bool, tight subset or not, for lookup table file, i.e. geomap*.trans Outputs: out_file : str, path/name of output file; out_file = 'subset_'+fname, if fname is in current directory; out_file = fname, if fname is not in the current directory. """ # Input File Info atr = readfile.read_attribute(fname) width = int(atr['WIDTH']) length = int(atr['LENGTH']) k = atr['FILE_TYPE'] print('subset ' + k + ' file: ' + fname + ' ...') subset_dict = subset_dict_input.copy() # Read Subset Inputs into 4-tuple box in pixel and geo coord pix_box, geo_box = subset_input_dict2box(subset_dict, atr) coord = ut.coordinate(atr) # if fill_value exists and not None, subset data and fill assigned value for area out of its coverage. # otherwise, re-check subset to make sure it's within data coverage and initialize the matrix with np.nan outfill = False if 'fill_value' in subset_dict.keys() and subset_dict['fill_value']: outfill = True else: outfill = False if not outfill: pix_box = coord.check_box_within_data_coverage(pix_box) subset_dict['fill_value'] = np.nan geo_box = coord.box_pixel2geo(pix_box) data_box = (0, 0, width, length) print('data range in (x0,y0,x1,y1): {}'.format(data_box)) print('subset range in (x0,y0,x1,y1): {}'.format(pix_box)) print('data range in (W, N, E, S): {}'.format( coord.box_pixel2geo(data_box))) print('subset range in (W, N, E, S): {}'.format(geo_box)) if pix_box == data_box: print('Subset range == data coverage, no need to subset. Skip.') return fname # Calculate Subset/Overlap Index pix_box4data, pix_box4subset = get_box_overlap_index(data_box, pix_box) ########################### Data Read and Write ###################### # Output File Name if not out_file: if os.getcwd() == os.path.dirname(os.path.abspath(fname)): if 'tight' in subset_dict.keys() and subset_dict['tight']: out_file = '{}_tight{}'.format( os.path.splitext(fname)[0], os.path.splitext(fname)[1]) else: out_file = 'sub_' + os.path.basename(fname) else: out_file = os.path.basename(fname) print('writing >>> ' + out_file) # update metadata atr = attr.update_attribute4subset(atr, pix_box) # subset datasets one by one dsNames = readfile.get_dataset_list(fname) maxDigit = max([len(i) for i in dsNames]) ext = os.path.splitext(out_file)[1] if ext in ['.h5', '.he5']: # initiate the output file writefile.layout_hdf5(out_file, metadata=atr, ref_file=fname) # subset dataset one-by-one for dsName in dsNames: with h5py.File(fname, 'r') as fi: ds = fi[dsName] ds_shape = ds.shape ds_ndim = ds.ndim print('cropping {d} in {b} from {f} ...'.format( d=dsName, b=pix_box4data, f=os.path.basename(fname))) if ds_ndim == 2: # read data = ds[pix_box4data[1]:pix_box4data[3], pix_box4data[0]:pix_box4data[2]] # crop data_out = np.ones( (pix_box[3] - pix_box[1], pix_box[2] - pix_box[0]), data.dtype) * subset_dict['fill_value'] data_out[pix_box4subset[1]:pix_box4subset[3], pix_box4subset[0]:pix_box4subset[2]] = data data_out = np.array(data_out, dtype=data.dtype) # write block = [0, int(atr['LENGTH']), 0, int(atr['WIDTH'])] writefile.write_hdf5_block(out_file, data=data_out, datasetName=dsName, block=block, print_msg=True) if ds_ndim == 3: prog_bar = ptime.progressBar(maxValue=ds_shape[0]) for i in range(ds_shape[0]): # read data = ds[i, pix_box4data[1]:pix_box4data[3], pix_box4data[0]:pix_box4data[2]] # crop data_out = np.ones( (1, pix_box[3] - pix_box[1], pix_box[2] - pix_box[0]), data.dtype) * subset_dict['fill_value'] data_out[:, pix_box4subset[1]:pix_box4subset[3], pix_box4subset[0]:pix_box4subset[2]] = data # write block = [ i, i + 1, 0, int(atr['LENGTH']), 0, int(atr['WIDTH']) ] writefile.write_hdf5_block(out_file, data=data_out, datasetName=dsName, block=block, print_msg=False) prog_bar.update(i + 1, suffix='{}/{}'.format( i + 1, ds_shape[0])) prog_bar.close() print('finished writing to file: {}'.format(out_file)) else: # IO for binary files dsDict = dict() for dsName in dsNames: dsDict[dsName] = subset_dataset( fname, dsName, pix_box, pix_box4data, pix_box4subset, fill_value=subset_dict['fill_value']) writefile.write(dsDict, out_file=out_file, metadata=atr, ref_file=fname) # write extra metadata files for ISCE data files if os.path.isfile(fname + '.xml') or os.path.isfile(fname + '.aux.xml'): # write ISCE XML file dtype_gdal = readfile.NUMPY2GDAL_DATATYPE[atr['DATA_TYPE']] dtype_isce = readfile.GDAL2ISCE_DATATYPE[dtype_gdal] writefile.write_isce_xml(out_file, width=int(atr['WIDTH']), length=int(atr['LENGTH']), bands=len(dsDict.keys()), data_type=dtype_isce, scheme=atr['scheme'], image_type=atr['FILE_TYPE']) print(f'write file: {out_file}.xml') # write GDAL VRT file if os.path.isfile(fname + '.vrt'): from isceobj.Util.ImageUtil import ImageLib as IML img = IML.loadImage(out_file)[0] img.renderVRT() print(f'write file: {out_file}.vrt') return out_file
def correct_dem_error(inps): """Correct DEM error of input timeseries file""" start_time = time.time() ## 1. input info # 1.1 read date info ts_obj = timeseries(inps.timeseries_file) ts_obj.open() num_date = ts_obj.numDate length, width = ts_obj.length, ts_obj.width num_step = len(inps.stepFuncDate) # exclude dates date_flag = read_exclude_date(inps.excludeDate, ts_obj.dateList)[0] if inps.polyOrder > np.sum(date_flag): raise ValueError( "input poly order {} > number of acquisition {}! Reduce it!". format(inps.polyOrder, np.sum(date_flag))) # 1.2 design matrix part 1 - time func for surface deformation G_defo = get_design_matrix4defo(inps) ## 2. prepare output # 2.1 metadata meta = dict(ts_obj.metadata) print( 'add/update the following configuration metadata to file:\n{}'.format( configKeys)) for key in configKeys: meta[key_prefix + key] = str(vars(inps)[key]) # 2.2 instantiate est. DEM error dem_err_file = 'demErr.h5' meta['FILE_TYPE'] = 'dem' meta['UNIT'] = 'm' ds_name_dict = {'dem': [np.float32, (length, width), None]} writefile.layout_hdf5(dem_err_file, ds_name_dict, metadata=meta) # 2.3 instantiate corrected time-series ts_cor_file = inps.outfile meta['FILE_TYPE'] = 'timeseries' writefile.layout_hdf5(ts_cor_file, metadata=meta, ref_file=inps.timeseries_file) # 2.4 instantiate residual phase time-series ts_res_file = os.path.join(os.path.dirname(inps.outfile), 'timeseriesResidual.h5') writefile.layout_hdf5(ts_res_file, metadata=meta, ref_file=inps.timeseries_file) # 2.5 instantiate est. step model(s) step_file = None if num_step > 0: step_file = os.path.join(os.path.dirname(inps.outfile), 'timeseriesStepModel.h5') meta.pop('REF_DATE') step_dates = np.array(inps.stepFuncDate, dtype=np.string_) ds_name_dict = { 'date': [step_dates.dtype, (num_step, ), step_dates], 'timeseries': [np.float32, (num_step, length, width), None] } writefile.layout_hdf5(step_file, ds_name_dict, metadata=meta) ## 3. run the estimation and write to disk # 3.1 split ts_file into blocks to save memory box_list, num_box = split2boxes(inps.timeseries_file, geom_file=inps.geom_file, memory_size=inps.memorySize, num_step=num_step) # 3.2 invert / write block-by-block for i, box in enumerate(box_list): box_width = box[2] - box[0] box_length = box[3] - box[1] if num_box > 1: print('\n------- processing patch {} out of {} --------------'. format(i + 1, num_box)) print('box width: {}'.format(box_width)) print('box length: {}'.format(box_length)) # invert (delta_z, ts_cor, ts_res, step_model) = correct_dem_error_patch( G_defo, ts_file=inps.timeseries_file, geom_file=inps.geom_file, box=box, date_flag=date_flag, num_step=num_step, phase_velocity=inps.phaseVelocity) # write the block to disk # with 3D block in [z0, z1, y0, y1, x0, x1] # and 2D block in [y0, y1, x0, x1] # DEM error - 2D block = [box[1], box[3], box[0], box[2]] writefile.write_hdf5_block(dem_err_file, data=delta_z, datasetName='dem', block=block) # corrected time-series - 3D block = [0, num_date, box[1], box[3], box[0], box[2]] writefile.write_hdf5_block(ts_cor_file, data=ts_cor, datasetName='timeseries', block=block) # residual time-series - 3D block = [0, num_date, box[1], box[3], box[0], box[2]] writefile.write_hdf5_block(ts_res_file, data=ts_res, datasetName='timeseries', block=block) # step func time-series - 3D if num_step > 0: block = [0, num_step, box[1], box[3], box[0], box[2]] writefile.write_hdf5_block(step_file, data=step_model, datasetName='timeseries', block=block) # time info m, s = divmod(time.time() - start_time, 60) print('time used: {:02.0f} mins {:02.1f} secs.'.format(m, s)) return dem_err_file, ts_cor_file, ts_res_file, step_file
def run_timeseries2time_func(inps): # basic info atr = readfile.read_attribute(inps.timeseries_file) length, width = int(atr['LENGTH']), int(atr['WIDTH']) num_date = inps.numDate dates = np.array(inps.dateList) seconds = atr.get('CENTER_LINE_UTC', 0) # use the 1st date as reference if not found, e.g. timeseriesResidual.h5 file if "REF_DATE" not in atr.keys() and not inps.ref_date: inps.ref_date = inps.dateList[0] print( 'WARNING: No REF_DATE found in time-series file or input in command line.' ) print(' Set "--ref-date {}" and continue.'.format(inps.dateList[0])) # get deformation model from parsers model, num_param = read_inps2model(inps) ## output preparation # time_func_param: attributes atrV = dict(atr) atrV['FILE_TYPE'] = 'velocity' atrV['UNIT'] = 'm/year' atrV['START_DATE'] = inps.dateList[0] atrV['END_DATE'] = inps.dateList[-1] atrV['DATE12'] = '{}_{}'.format(inps.dateList[0], inps.dateList[-1]) if inps.ref_yx: atrV['REF_Y'] = inps.ref_yx[0] atrV['REF_X'] = inps.ref_yx[1] if inps.ref_date: atrV['REF_DATE'] = inps.ref_date # time_func_param: config parameter print('add/update the following configuration metadata:\n{}'.format( configKeys)) for key in configKeys: atrV[key_prefix + key] = str(vars(inps)[key]) # time_func_param: instantiate output file ds_name_dict, ds_unit_dict = model2hdf5_dataset(model, ds_shape=(length, width))[1:] writefile.layout_hdf5(inps.outfile, metadata=atrV, ds_name_dict=ds_name_dict, ds_unit_dict=ds_unit_dict) # timeseries_res: attributes + instantiate output file if inps.save_res: atrR = dict(atr) for key in ['REF_DATE']: if key in atrR.keys(): atrR.pop(key) writefile.layout_hdf5(inps.res_file, metadata=atrR, ref_file=inps.timeseries_file) ## estimation # calc number of box based on memory limit memoryAll = (num_date + num_param * 2 + 2) * length * width * 4 if inps.bootstrap: memoryAll += inps.bootstrapCount * num_param * length * width * 4 num_box = int(np.ceil(memoryAll * 3 / (inps.maxMemory * 1024**3))) box_list = cluster.split_box2sub_boxes(box=(0, 0, width, length), num_split=num_box, dimension='y', print_msg=True) # loop for block-by-block IO for i, box in enumerate(box_list): box_wid = box[2] - box[0] box_len = box[3] - box[1] num_pixel = box_len * box_wid if num_box > 1: print('\n------- processing patch {} out of {} --------------'. format(i + 1, num_box)) print('box width: {}'.format(box_wid)) print('box length: {}'.format(box_len)) # initiate output m = np.zeros((num_param, num_pixel), dtype=dataType) m_std = np.zeros((num_param, num_pixel), dtype=dataType) # read input print('reading data from file {} ...'.format(inps.timeseries_file)) ts_data = readfile.read(inps.timeseries_file, box=box)[0] # referencing in time and space # for file w/o reference info. e.g. ERA5.h5 if inps.ref_date: print('referecing to date: {}'.format(inps.ref_date)) ref_ind = inps.dateList.index(inps.ref_date) ts_data -= np.tile(ts_data[ref_ind, :, :], (ts_data.shape[0], 1, 1)) if inps.ref_yx: print('referencing to point (y, x): ({}, {})'.format( inps.ref_yx[0], inps.ref_yx[1])) ref_box = (inps.ref_yx[1], inps.ref_yx[0], inps.ref_yx[1] + 1, inps.ref_yx[0] + 1) ref_val = readfile.read(inps.timeseries_file, box=ref_box)[0] ts_data -= np.tile(ref_val.reshape(ts_data.shape[0], 1, 1), (1, ts_data.shape[1], ts_data.shape[2])) ts_data = ts_data[inps.dropDate, :, :].reshape(inps.numDate, -1) if atrV['UNIT'] == 'mm': ts_data *= 1. / 1000. ts_std = None if inps.ts_std_file: ts_std = readfile.read(inps.ts_std_file, box=box)[0] ts_std = ts_std[inps.dropDate, :, :].reshape(inps.numDate, -1) # set zero value to a fixed small value to avoid divide by zero epsilon = 1e-5 ts_std[ts_std < epsilon] = epsilon # mask invalid pixels print('skip pixels with zero/nan value in all acquisitions') ts_stack = np.nanmean(ts_data, axis=0) mask = np.multiply(~np.isnan(ts_stack), ts_stack != 0.) del ts_stack if ts_std is not None: print('skip pxiels with nan STD value in any acquisition') num_std_nan = np.sum(np.isnan(ts_std), axis=0) mask *= num_std_nan == 0 del num_std_nan ts_data = ts_data[:, mask] num_pixel2inv = int(np.sum(mask)) idx_pixel2inv = np.where(mask)[0] print('number of pixels to invert: {} out of {} ({:.1f}%)'.format( num_pixel2inv, num_pixel, num_pixel2inv / num_pixel * 100)) # go to next if no valid pixel found if num_pixel2inv == 0: continue ### estimation / solve Gm = d print('estimating time functions via linalg.lstsq ...') if inps.bootstrap: ## option 1 - least squares with bootstrapping # Bootstrapping is a resampling method which can be used to estimate properties # of an estimator. The method relies on independently sampling the data set with # replacement. print( 'estimating time function STD with bootstrap resampling ({} times) ...' .format(inps.bootstrapCount)) # calc model of all bootstrap sampling rng = np.random.default_rng() m_boot = np.zeros((inps.bootstrapCount, num_param, num_pixel2inv), dtype=dataType) prog_bar = ptime.progressBar(maxValue=inps.bootstrapCount) for i in range(inps.bootstrapCount): # bootstrap resampling boot_ind = rng.choice(inps.numDate, size=inps.numDate, replace=True) boot_ind.sort() # estimation m_boot[i] = time_func.estimate_time_func( model=model, date_list=dates[boot_ind].tolist(), dis_ts=ts_data[boot_ind], seconds=seconds)[1] prog_bar.update(i + 1, suffix='iteration {} / {}'.format( i + 1, inps.bootstrapCount)) prog_bar.close() #del ts_data # get mean/std among all bootstrap sampling m[:, mask] = m_boot.mean(axis=0).reshape(num_param, -1) m_std[:, mask] = m_boot.std(axis=0).reshape(num_param, -1) del m_boot else: ## option 2 - least squares with uncertainty propagation G, m[:, mask], e2 = time_func.estimate_time_func( model=model, date_list=inps.dateList, dis_ts=ts_data, seconds=seconds) #del ts_data ## Compute the covariance matrix for model parameters: Gm = d # C_m_hat = (G.T * C_d^-1, * G)^-1 # linear propagation from the TS covariance matrix. (option 2.1) # = sigma^2 * (G.T * G)^-1 # assuming obs errors are normally dist. in time. (option 2.2a) # Based on the law of integrated expectation, we estimate the obs sigma^2 using # the OLS estimation residual e_hat_i = d_i - d_hat_i # sigma^2 = sigma_hat^2 * N / (N - P) (option 2.2b) # = (e_hat.T * e_hat) / (N - P) # sigma_hat^2 = (e_hat.T * e_hat) / N if ts_std is not None: # option 2.1 - linear propagation from time-series covariance matrix print( 'estimating time function STD from time-series STD pixel-by-pixel ...' ) prog_bar = ptime.progressBar(maxValue=num_pixel2inv) for i in range(num_pixel2inv): idx = idx_pixel2inv[i] try: C_ts_inv = np.diag(1. / np.square(ts_std[:, idx].flatten())) m_var = np.diag(linalg.inv( G.T.dot(C_ts_inv).dot(G))).astype(np.float32) m_std[:, idx] = np.sqrt(m_var) except linalg.LinAlgError: m_std[:, idx] = np.nan prog_bar.update(i + 1, every=200, suffix='{}/{} pixels'.format( i + 1, num_pixel2inv)) prog_bar.close() else: # option 2.2a - assume obs errors following normal dist. in time print( 'estimating time function STD from time-series fitting residual ...' ) G_inv = linalg.inv(np.dot(G.T, G)) m_var = e2.reshape(1, -1) / (num_date - num_param) m_std[:, mask] = np.sqrt( np.dot(np.diag(G_inv).reshape(-1, 1), m_var)) # option 2.2b - simplified form for linear velocity (without matrix linear algebra) # The STD can also be calculated using Eq. (10) from Fattahi and Amelung (2015, JGR) # ts_diff = ts_data - np.dot(G, m) # t_diff = G[:, 1] - np.mean(G[:, 1]) # vel_std = np.sqrt(np.sum(ts_diff ** 2, axis=0) / np.sum(t_diff ** 2) / (num_date - 2)) # write - time func params block = [box[1], box[3], box[0], box[2]] ds_dict = model2hdf5_dataset(model, m, m_std, mask=mask)[0] for ds_name, data in ds_dict.items(): writefile.write_hdf5_block(inps.outfile, data=data.reshape(box_len, box_wid), datasetName=ds_name, block=block) # write - residual file if inps.save_res: block = [0, num_date, box[1], box[3], box[0], box[2]] ts_res = np.ones( (num_date, box_len * box_wid), dtype=np.float32) * np.nan ts_res[:, mask] = ts_data - np.dot(G, m)[:, mask] writefile.write_hdf5_block(inps.res_file, data=ts_res.reshape( num_date, box_len, box_wid), datasetName='timeseries', block=block) return inps.outfile
def run_geocode(inps): """geocode all input files""" start_time = time.time() # feed the largest file for resample object initiation ind_max = np.argmax([os.path.getsize(i) for i in inps.file]) # prepare geometry for geocoding res_obj = resample(lut_file=inps.lookupFile, src_file=inps.file[ind_max], SNWE=inps.SNWE, lalo_step=inps.laloStep, interp_method=inps.interpMethod, fill_value=inps.fillValue, nprocs=inps.nprocs, max_memory=inps.maxMemory, software=inps.software, print_msg=True) res_obj.open() res_obj.prepare() # resample input files one by one for infile in inps.file: print('-' * 50 + '\nresampling file: {}'.format(infile)) ext = os.path.splitext(infile)[1] atr = readfile.read_attribute(infile, datasetName=inps.dset) outfile = auto_output_filename(infile, inps) # update_mode if inps.updateMode: print('update mode: ON') if ut.run_or_skip(outfile, in_file=[infile, inps.lookupFile]) == 'skip': continue ## prepare output # update metadata if inps.radar2geo: atr = attr.update_attribute4radar2geo(atr, res_obj=res_obj) else: atr = attr.update_attribute4geo2radar(atr, res_obj=res_obj) # instantiate output file file_is_hdf5 = os.path.splitext(infile)[1] in ['.h5', '.he5'] if file_is_hdf5: writefile.layout_hdf5(outfile, metadata=atr, ref_file=infile) else: dsDict = dict() ## run dsNames = readfile.get_dataset_list(infile, datasetName=inps.dset) maxDigit = max([len(i) for i in dsNames]) for dsName in dsNames: if not file_is_hdf5: dsDict[dsName] = np.zeros((res_obj.length, res_obj.width)) # loop for block-by-block IO for i in range(res_obj.num_box): src_box = res_obj.src_box_list[i] dest_box = res_obj.dest_box_list[i] # read print('-' * 50 + '\nreading {d:<{w}} in block {b} from {f} ...'.format( d=dsName, w=maxDigit, b=src_box, f=os.path.basename(infile))) data = readfile.read(infile, datasetName=dsName, box=src_box, print_msg=False)[0] # resample data = res_obj.run_resample(src_data=data, box_ind=i) # write / save block data if data.ndim == 3: block = [ 0, data.shape[0], dest_box[1], dest_box[3], dest_box[0], dest_box[2] ] else: block = [ dest_box[1], dest_box[3], dest_box[0], dest_box[2] ] if file_is_hdf5: print('write data in block {} to file: {}'.format( block, outfile)) writefile.write_hdf5_block(outfile, data=data, datasetName=dsName, block=block, print_msg=False) else: dsDict[dsName][block[0]:block[1], block[2]:block[3]] = data # for binary file: ensure same data type if not file_is_hdf5: dsDict[dsName] = np.array(dsDict[dsName], dtype=data.dtype) # write binary file if not file_is_hdf5: writefile.write(dsDict, out_file=outfile, metadata=atr, ref_file=infile) m, s = divmod(time.time() - start_time, 60) print('time used: {:02.0f} mins {:02.1f} secs.\n'.format(m, s)) return outfile
def ifgram_inversion(inps=None): """Phase triangulatino of small baseline interferograms Parameters: inps - namespace Example: inps = cmd_line_parse() ifgram_inversion(inps) """ if not inps: inps = cmd_line_parse() start_time = time.time() ## 1. input info stack_obj = ifgramStack(inps.ifgramStackFile) stack_obj.open(print_msg=False) date12_list = stack_obj.get_date12_list(dropIfgram=True) date_list = stack_obj.get_date_list(dropIfgram=True) length, width = stack_obj.length, stack_obj.width # 1.1 read values on the reference pixel inps.refPhase = stack_obj.get_reference_phase( unwDatasetName=inps.obsDatasetName, skip_reference=inps.skip_ref, dropIfgram=True) # 1.2 design matrix A = stack_obj.get_design_matrix4timeseries(date12_list)[0] num_ifgram, num_date = A.shape[0], A.shape[1] + 1 inps.numIfgram = num_ifgram # 1.3 print key setup info msg = '-------------------------------------------------------------------------------\n' if inps.minNormVelocity: suffix = 'deformation velocity' else: suffix = 'deformation phase' msg += 'least-squares solution with L2 min-norm on: {}\n'.format(suffix) msg += 'minimum redundancy: {}\n'.format(inps.minRedundancy) msg += 'weight function: {}\n'.format(inps.weightFunc) if inps.maskDataset: if inps.maskDataset in ['coherence', 'offsetSNR']: suffix = '{} < {}'.format(inps.maskDataset, inps.maskThreshold) else: suffix = '{} == 0'.format(inps.maskDataset) msg += 'mask out pixels with: {}\n'.format(suffix) else: msg += 'mask: no\n' if np.linalg.matrix_rank(A) < A.shape[1]: msg += '***WARNING: the network is NOT fully connected.\n' msg += '\tInversion result can be biased!\n' msg += '\tContinue to use SVD to resolve the offset between different subsets.\n' msg += '-------------------------------------------------------------------------------' print(msg) print('number of interferograms: {}'.format(num_ifgram)) print('number of acquisitions : {}'.format(num_date)) print('number of lines : {}'.format(length)) print('number of columns : {}'.format(width)) ## 2. prepare output # 2.1 metadata meta = dict(stack_obj.metadata) for key in configKeys: meta[key_prefix + key] = str(vars(inps)[key]) # 2.2 instantiate time-series dsNameDict = { "date": (np.dtype('S8'), (num_date, )), "bperp": (np.float32, (num_date, )), "timeseries": (np.float32, (num_date, length, width)), } meta['FILE_TYPE'] = 'timeseries' meta['UNIT'] = 'm' meta['REF_DATE'] = date_list[0] ts_obj = timeseries(inps.tsFile) ts_obj.layout_hdf5(dsNameDict, meta) # write date time-series date_list_utf8 = [dt.encode('utf-8') for dt in date_list] writefile.write_hdf5_block(inps.tsFile, date_list_utf8, datasetName='date') # write bperp time-series pbase = stack_obj.get_perp_baseline_timeseries(dropIfgram=True) writefile.write_hdf5_block(inps.tsFile, pbase, datasetName='bperp') # 2.3 instantiate temporal coherence dsNameDict = {"temporalCoherence": (np.float32, (length, width))} meta['FILE_TYPE'] = 'temporalCoherence' meta['UNIT'] = '1' meta.pop('REF_DATE') writefile.layout_hdf5(inps.tempCohFile, dsNameDict, metadata=meta) # 2.4 instantiate number of inverted observations dsNameDict = {"mask": (np.float32, (length, width))} meta['FILE_TYPE'] = 'mask' meta['UNIT'] = '1' writefile.layout_hdf5(inps.numInvFile, dsNameDict, metadata=meta) ## 3. run the inversion / estimation and write to disk # 3.1 split ifgram_file into blocks to save memory box_list, num_box = split2boxes(inps.ifgramStackFile, memory_size=inps.memorySize) # 3.2 prepare the input arguments for *_patch() data_kwargs = { "ifgram_file": inps.ifgramStackFile, "ref_phase": inps.refPhase, "obs_ds_name": inps.obsDatasetName, "weight_func": inps.weightFunc, "min_norm_velocity": inps.minNormVelocity, "water_mask_file": inps.waterMaskFile, "mask_ds_name": inps.maskDataset, "mask_threshold": inps.maskThreshold, "min_redundancy": inps.minRedundancy } # 3.3 invert / write block-by-block for i, box in enumerate(box_list): box_width = box[2] - box[0] box_length = box[3] - box[1] if num_box > 1: print('\n------- processing patch {} out of {} --------------'. format(i + 1, num_box)) print('box width: {}'.format(box_width)) print('box length: {}'.format(box_length)) # update box argument in the input data data_kwargs['box'] = box if inps.cluster == 'no': # non-parallel ts, temp_coh, num_inv_ifg = ifgram_inversion_patch( **data_kwargs)[:-1] else: # parallel print('\n\n------- start parallel processing using Dask -------') # initiate the output data ts = np.zeros((num_date, box_length, box_width), np.float32) temp_coh = np.zeros((box_length, box_width), np.float32) num_inv_ifg = np.zeros((box_length, box_width), np.float32) # initiate dask cluster and client cluster_obj = cluster.DaskCluster(inps.cluster, inps.numWorker, config_name=inps.config) cluster_obj.open() # run dask ts, temp_coh, num_inv_ifg = cluster_obj.run( func=ifgram_inversion_patch, func_data=data_kwargs, results=[ts, temp_coh, num_inv_ifg]) # close dask cluster and client cluster_obj.close() print('------- finished parallel processing -------\n\n') # write the block to disk # with 3D block in [z0, z1, y0, y1, x0, x1] # and 2D block in [y0, y1, x0, x1] # time-series - 3D block = [0, num_date, box[1], box[3], box[0], box[2]] writefile.write_hdf5_block(inps.tsFile, data=ts, datasetName='timeseries', block=block) # temporal coherence - 2D block = [box[1], box[3], box[0], box[2]] writefile.write_hdf5_block(inps.tempCohFile, data=temp_coh, datasetName='temporalCoherence', block=block) # number of inverted obs - 2D writefile.write_hdf5_block(inps.numInvFile, data=num_inv_ifg, datasetName='mask', block=block) m, s = divmod(time.time() - start_time, 60) print('time used: {:02.0f} mins {:02.1f} secs.\n'.format(m, s)) # 3.4 update output data on the reference pixel if not inps.skip_ref: # grab ref_y/x ref_y = int(stack_obj.metadata['REF_Y']) ref_x = int(stack_obj.metadata['REF_X']) print('-' * 50) print('update values on the reference pixel: ({}, {})'.format( ref_y, ref_x)) print('set temporal coherence on the reference pixel to 1.') with h5py.File(inps.tempCohFile, 'r+') as f: f['temporalCoherence'][ref_y, ref_x] = 1. print('set # of observations on the reference pixel as {}'.format( num_ifgram)) with h5py.File(inps.numInvFile, 'r+') as f: f['mask'][ref_y, ref_x] = num_ifgram m, s = divmod(time.time() - start_time, 60) print('time used: {:02.0f} mins {:02.1f} secs.\n'.format(m, s)) return
def correct_dem_error(inps): """Correct DEM error of input timeseries file""" start_time = time.time() # limit the number of threads to 1 # for slight speedup and big CPU usage save num_threads_dict = cluster.set_num_threads("1") ## 1. input info # 1.1 read date info ts_obj = timeseries(inps.timeseries_file) ts_obj.open() num_date = ts_obj.numDate length, width = ts_obj.length, ts_obj.width num_step = len(inps.stepFuncDate) # exclude dates date_flag = read_exclude_date(inps.excludeDate, ts_obj.dateList)[0] if inps.polyOrder > np.sum(date_flag): raise ValueError( "input poly order {} > number of acquisition {}! Reduce it!". format(inps.polyOrder, np.sum(date_flag))) # 1.2 design matrix part 1 - time func for surface deformation G_defo = get_design_matrix4defo(inps) ## 2. prepare output # 2.1 metadata meta = dict(ts_obj.metadata) print( 'add/update the following configuration metadata to file:\n{}'.format( configKeys)) for key in configKeys: meta[key_prefix + key] = str(vars(inps)[key]) # 2.2 instantiate est. DEM error dem_err_file = 'demErr.h5' meta['FILE_TYPE'] = 'dem' meta['UNIT'] = 'm' ds_name_dict = {'dem': [np.float32, (length, width), None]} writefile.layout_hdf5(dem_err_file, ds_name_dict, metadata=meta) # 2.3 instantiate corrected time-series ts_cor_file = inps.outfile meta['FILE_TYPE'] = 'timeseries' writefile.layout_hdf5(ts_cor_file, metadata=meta, ref_file=inps.timeseries_file) # 2.4 instantiate residual phase time-series ts_res_file = os.path.join(os.path.dirname(inps.outfile), 'timeseriesResidual.h5') writefile.layout_hdf5(ts_res_file, metadata=meta, ref_file=inps.timeseries_file) ## 3. run the estimation and write to disk # 3.1 split ts_file into blocks to save memory # 1st dimension size: ts (obs / cor / res / step) + dem_err/inc_angle/rg_dist (+pbase) num_epoch = num_date * 3 + num_step + 3 if inps.geom_file: geom_obj = geometry(inps.geom_file) geom_obj.open(print_msg=False) if 'bperp' in geom_obj.datasetNames: num_epoch += num_date # split in row/line direction based on the input memory limit num_box = int( np.ceil((num_epoch * length * width * 4) * 2.5 / (inps.maxMemory * 1024**3))) box_list = cluster.split_box2sub_boxes(box=(0, 0, width, length), num_split=num_box, dimension='y') # 3.2 prepare the input arguments for *_patch() data_kwargs = { 'G_defo': G_defo, 'ts_file': inps.timeseries_file, 'geom_file': inps.geom_file, 'date_flag': date_flag, 'phase_velocity': inps.phaseVelocity, } # 3.3 invert / write block-by-block for i, box in enumerate(box_list): box_wid = box[2] - box[0] box_len = box[3] - box[1] if num_box > 1: print('\n------- processing patch {} out of {} --------------'. format(i + 1, num_box)) print('box width: {}'.format(box_wid)) print('box length: {}'.format(box_len)) # update box argument in the input data data_kwargs['box'] = box # invert if not inps.cluster: # non-parallel delta_z, ts_cor, ts_res = correct_dem_error_patch( **data_kwargs)[:-1] else: # parallel print('\n\n------- start parallel processing using Dask -------') # initiate the output data delta_z = np.zeros((box_len, box_wid), dtype=np.float32) ts_cor = np.zeros((num_date, box_len, box_wid), dtype=np.float32) ts_res = np.zeros((num_date, box_len, box_wid), dtype=np.float32) # initiate dask cluster and client cluster_obj = cluster.DaskCluster(inps.cluster, inps.numWorker, config_name=inps.config) cluster_obj.open() # run dask delta_z, ts_cor, ts_res = cluster_obj.run( func=correct_dem_error_patch, func_data=data_kwargs, results=[delta_z, ts_cor, ts_res]) # close dask cluster and client cluster_obj.close() print('------- finished parallel processing -------\n\n') # write the block to disk # with 3D block in [z0, z1, y0, y1, x0, x1] # and 2D block in [y0, y1, x0, x1] # DEM error - 2D block = [box[1], box[3], box[0], box[2]] writefile.write_hdf5_block(dem_err_file, data=delta_z, datasetName='dem', block=block) # corrected time-series - 3D block = [0, num_date, box[1], box[3], box[0], box[2]] writefile.write_hdf5_block(ts_cor_file, data=ts_cor, datasetName='timeseries', block=block) # residual time-series - 3D block = [0, num_date, box[1], box[3], box[0], box[2]] writefile.write_hdf5_block(ts_res_file, data=ts_res, datasetName='timeseries', block=block) # roll back to the origial number of threads cluster.roll_back_num_threads(num_threads_dict) # time info m, s = divmod(time.time() - start_time, 60) print('time used: {:02.0f} mins {:02.1f} secs.'.format(m, s)) return dem_err_file, ts_cor_file, ts_res_file
def change_timeseries_ref_date(ts_file, ref_date, outfile=None, max_memory=4.0, force=False): """Change input file reference date to a different one. Parameters: ts_file : str, timeseries file to be changed ref_date : str, date in YYYYMMDD format outfile : if str, save to a different file if None, modify the data value in the existing input file """ ts_file = os.path.abspath(ts_file) if not outfile: outfile = ts_file outfile = os.path.abspath(outfile) print('-'*50) print('change reference date for file: {}'.format(ts_file)) atr = readfile.read_attribute(ts_file) dsName = atr['FILE_TYPE'] # if the input reference date is the same as the existing one. if ref_date == atr.get('REF_DATE', None) and not force: print('input refDate is the same as the existing REF_DATE.') if outfile == ts_file: print('Nothing to be done.') return ts_file else: print('Copy {} to {}'.format(ts_file, outfile)) shutil.copy2(ts_file, outfile) return outfile # basic info obj = timeseries(ts_file) obj.open(print_msg=False) num_date = obj.numDate length = obj.length width = obj.width ref_idx = obj.dateList.index(ref_date) # get list of boxes for block-by-block IO num_box = int(np.ceil((num_date * length * width * 4 * 2) / (max_memory * 1024**3))) box_list = split_box2sub_boxes(box=(0, 0, width, length), num_split=num_box, dimension='y', print_msg=True) # updating existing file or write new file if outfile == ts_file: mode = 'r+' else: mode = 'a' # instantiate output file writefile.layout_hdf5(outfile, ref_file=ts_file) # loop for block-by-block IO for i, box in enumerate(box_list): box_width = box[2] - box[0] box_length = box[3] - box[1] if num_box > 1: print('\n------- processing patch {} out of {} --------------'.format(i+1, num_box)) print('box width: {}'.format(box_width)) print('box length: {}'.format(box_length)) # reading print('reading data ...') ts_data = readfile.read(ts_file, box=box)[0] print('referencing in time ...') dshape = ts_data.shape ts_data -= np.tile(ts_data[ref_idx, :, :].reshape(1, dshape[1], dshape[2]), (dshape[0], 1, 1)) # writing block = (0, num_date, box[1], box[3], box[0], box[2]) writefile.write_hdf5_block(outfile, data=ts_data, datasetName=dsName, block=block, mode=mode) # update metadata print('update "REF_DATE" attribute value to {}'.format(ref_date)) with h5py.File(outfile, 'r+') as f: f.attrs['REF_DATE'] = ref_date f.attrs['FILE_PATH'] = outfile return outfile
def multilook_file(infile, lks_y, lks_x, outfile=None, method='average', margin=[0, 0, 0, 0]): """ Multilook input file Parameters: infile - str, path of input file to be multilooked. lks_y - int, number of looks in y / row direction. lks_x - int, number of looks in x / column direction. margin - list of 4 int, number of pixels to be skipped during multilooking. useful for offset product, where the marginal pixels are ignored during cross correlation matching. outfile - str, path of output file Returns: outfile - str, path of output file """ lks_y = int(lks_y) lks_x = int(lks_x) # input file info atr = readfile.read_attribute(infile) length, width = int(atr['LENGTH']), int(atr['WIDTH']) k = atr['FILE_TYPE'] print('multilooking {} {} file: {}'.format(atr['PROCESSOR'], k, infile)) print('number of looks in y / azimuth direction: %d' % lks_y) print('number of looks in x / range direction: %d' % lks_x) print('multilook method: {}'.format(method)) # margin --> box if margin is not [0, 0, 0, 0]: # top, bottom, left, right box = (margin[2], margin[0], width - margin[3], length - margin[1]) print( 'number of pixels to skip in top/bottom/left/right boundaries: {}'. format(margin)) else: box = (0, 0, width, length) # output file name ext = os.path.splitext(infile)[1] if not outfile: if os.getcwd() == os.path.dirname(os.path.abspath(infile)): outfile = os.path.splitext(infile)[0] + '_' + str( lks_y) + 'alks_' + str(lks_x) + 'rlks' + ext else: outfile = os.path.basename(infile) # update metadata atr = multilook_attribute(atr, lks_y, lks_x, box=box) if ext in ['.h5', '.he5']: writefile.layout_hdf5(outfile, metadata=atr, ref_file=infile) # read source data and multilooking dsNames = readfile.get_dataset_list(infile) maxDigit = max([len(i) for i in dsNames]) dsDict = dict() for dsName in dsNames: print('multilooking {d:<{w}} from {f} ...'.format( d=dsName, w=maxDigit, f=os.path.basename(infile))) # split in Y/row direction for IO for HDF5 only if ext in ['.h5', '.he5']: row_step = 200 else: row_step = box[3] - box[1] num_step = int(np.ceil((box[3] - box[1]) / (row_step * lks_y))) for i in range(num_step): r0 = box[1] + row_step * lks_y * i r1 = box[1] + row_step * lks_y * (i + 1) r1 = min(r1, box[3]) # IO box box_i = (box[0], r0, box[2], r1) box_o = (int((box[0] - box[0]) / lks_x), int( (r0 - box[1]) / lks_y), int( (box[2] - box[0]) / lks_x), int((r1 - box[1]) / lks_y)) print('box: {}'.format(box_o)) # read / multilook if method == 'nearest': data = readfile.read(infile, datasetName=dsName, box=box_i, xstep=lks_x, ystep=lks_y, print_msg=False)[0] # fix the size discrepency between average / nearest method out_len = box_o[3] - box_o[1] out_wid = box_o[2] - box_o[0] if data.ndim == 3: data = data[:, :out_len, :out_wid] else: data = data[:out_len, :out_wid] else: data = readfile.read(infile, datasetName=dsName, box=box_i, print_msg=False)[0] # keep timeseries data as 3D matrix when there is only one acquisition # because readfile.read() will squeeze it to 2D if atr['FILE_TYPE'] == 'timeseries' and len(data.shape) == 2: data = np.reshape(data, (1, data.shape[0], data.shape[1])) data = multilook_data(data, lks_y, lks_x) # output block if data.ndim == 3: block = [ 0, data.shape[0], box_o[1], box_o[3], box_o[0], box_o[2] ] else: block = [box_o[1], box_o[3], box_o[0], box_o[2]] # write if ext in ['.h5', '.he5']: writefile.write_hdf5_block(outfile, data=data, datasetName=dsName, block=block, print_msg=False) else: dsDict[dsName] = data # for binary file with 2 bands, always use BIL scheme if (len(dsDict.keys()) == 2 and os.path.splitext(infile)[1] not in ['.h5', '.he5'] and atr.get('scheme', 'BIL').upper() != 'BIL'): print('the input binary file has 2 bands with band interleave as: {}'. format(atr['scheme'])) print( 'for the output binary file, change the band interleave to BIL as default.' ) atr['scheme'] = 'BIL' if ext not in ['.h5', '.he5']: writefile.write(dsDict, out_file=outfile, metadata=atr, ref_file=infile) return outfile
def run_timeseries2time_func(inps): # basic info atr = readfile.read_attribute(inps.timeseries_file) length, width = int(atr['LENGTH']), int(atr['WIDTH']) num_date = inps.numDate dates = np.array(inps.dateList) seconds = atr.get('CENTER_LINE_UTC', 0) # use the 1st date as reference if not found, e.g. timeseriesResidual.h5 file if "REF_DATE" not in atr.keys() and not inps.ref_date: inps.ref_date = inps.dateList[0] print( 'WARNING: No REF_DATE found in time-series file or input in command line.' ) print(' Set "--ref-date {}" and continue.'.format(inps.dateList[0])) # get deformation model from parsers model, num_param = read_inps2model(inps) ## output preparation # time_func_param: attributes atrV = dict(atr) atrV['FILE_TYPE'] = 'velocity' atrV['UNIT'] = 'm/year' atrV['START_DATE'] = inps.dateList[0] atrV['END_DATE'] = inps.dateList[-1] atrV['DATE12'] = '{}_{}'.format(inps.dateList[0], inps.dateList[-1]) if inps.ref_yx: atrV['REF_Y'] = inps.ref_yx[0] atrV['REF_X'] = inps.ref_yx[1] if inps.ref_date: atrV['REF_DATE'] = inps.ref_date # time_func_param: config parameter print('add/update the following configuration metadata:\n{}'.format( configKeys)) for key in configKeys: atrV[key_prefix + key] = str(vars(inps)[key]) # time_func_param: instantiate output file ds_name_dict, ds_unit_dict = model2hdf5_dataset(model, ds_shape=(length, width))[1:] writefile.layout_hdf5(inps.outfile, metadata=atrV, ds_name_dict=ds_name_dict, ds_unit_dict=ds_unit_dict) # timeseries_res: attributes + instantiate output file if inps.save_res: atrR = dict(atr) # remove REF_DATE attribute for key in ['REF_DATE']: if key in atrR.keys(): atrR.pop(key) # prepare ds_name_dict manually, instead of using ref_file, to support --ex option date_len = len(inps.dateList[0]) ds_name_dict = { "date": [ np.dtype(f'S{date_len}'), (num_date, ), np.array(inps.dateList, dtype=np.string_) ], "timeseries": [np.float32, (num_date, length, width), None] } writefile.layout_hdf5(inps.res_file, ds_name_dict=ds_name_dict, metadata=atrR) ## estimation # calc number of box based on memory limit memoryAll = (num_date + num_param * 2 + 2) * length * width * 4 if inps.bootstrap: memoryAll += inps.bootstrapCount * num_param * length * width * 4 num_box = int(np.ceil(memoryAll * 3 / (inps.maxMemory * 1024**3))) box_list = cluster.split_box2sub_boxes(box=(0, 0, width, length), num_split=num_box, dimension='y', print_msg=True) # loop for block-by-block IO for i, box in enumerate(box_list): box_wid = box[2] - box[0] box_len = box[3] - box[1] num_pixel = box_len * box_wid if num_box > 1: print('\n------- processing patch {} out of {} --------------'. format(i + 1, num_box)) print('box width: {}'.format(box_wid)) print('box length: {}'.format(box_len)) # initiate output m = np.zeros((num_param, num_pixel), dtype=dataType) m_std = np.zeros((num_param, num_pixel), dtype=dataType) # read input print('reading data from file {} ...'.format(inps.timeseries_file)) ts_data = readfile.read(inps.timeseries_file, box=box)[0] # referencing in time and space # for file w/o reference info. e.g. ERA5.h5 if inps.ref_date: print('referecing to date: {}'.format(inps.ref_date)) ref_ind = inps.dateList.index(inps.ref_date) ts_data -= np.tile(ts_data[ref_ind, :, :], (ts_data.shape[0], 1, 1)) if inps.ref_yx: print('referencing to point (y, x): ({}, {})'.format( inps.ref_yx[0], inps.ref_yx[1])) ref_box = (inps.ref_yx[1], inps.ref_yx[0], inps.ref_yx[1] + 1, inps.ref_yx[0] + 1) ref_val = readfile.read(inps.timeseries_file, box=ref_box)[0] ts_data -= np.tile(ref_val.reshape(ts_data.shape[0], 1, 1), (1, ts_data.shape[1], ts_data.shape[2])) ts_data = ts_data[inps.dropDate, :, :].reshape(inps.numDate, -1) if atrV['UNIT'] == 'mm': ts_data *= 1. / 1000. ts_cov = None if inps.ts_cov_file: print( f'reading time-series covariance matrix from file {inps.ts_cov_file} ...' ) ts_cov = readfile.read(inps.ts_cov_file, box=box)[0] if len(ts_cov.shape) == 4: # full covariance matrix in 4D --> 3D if inps.numDate < ts_cov.shape[0]: ts_cov = ts_cov[inps.dropDate, :, :, :] ts_cov = ts_cov[:, inps.dropDate, :, :] ts_cov = ts_cov.reshape(inps.numDate, inps.numDate, -1) elif len(ts_cov.shape) == 3: # diaginal variance matrix in 3D --> 2D if inps.numDate < ts_cov.shape[0]: ts_cov = ts_cov[inps.dropDate, :, :] ts_cov = ts_cov.reshape(inps.numDate, -1) ## set zero value to a fixed small value to avoid divide by zero #epsilon = 1e-5 #ts_cov[ts_cov<epsilon] = epsilon # mask invalid pixels print('skip pixels with zero/nan value in all acquisitions') ts_stack = np.nanmean(ts_data, axis=0) mask = np.multiply(~np.isnan(ts_stack), ts_stack != 0.) del ts_stack #if ts_cov is not None: # print('skip pxiels with nan STD value in any acquisition') # num_std_nan = np.sum(np.isnan(ts_cov), axis=0) # mask *= num_std_nan == 0 # del num_std_nan ts_data = ts_data[:, mask] num_pixel2inv = int(np.sum(mask)) idx_pixel2inv = np.where(mask)[0] print('number of pixels to invert: {} out of {} ({:.1f}%)'.format( num_pixel2inv, num_pixel, num_pixel2inv / num_pixel * 100)) # go to next if no valid pixel found if num_pixel2inv == 0: continue ### estimation / solve Gm = d print('estimating time functions via linalg.lstsq ...') if inps.bootstrap: ## option 1 - least squares with bootstrapping # Bootstrapping is a resampling method which can be used to estimate properties # of an estimator. The method relies on independently sampling the data set with # replacement. print( 'estimating time function STD with bootstrap resampling ({} times) ...' .format(inps.bootstrapCount)) # calc model of all bootstrap sampling rng = np.random.default_rng() m_boot = np.zeros((inps.bootstrapCount, num_param, num_pixel2inv), dtype=dataType) prog_bar = ptime.progressBar(maxValue=inps.bootstrapCount) for i in range(inps.bootstrapCount): # bootstrap resampling boot_ind = rng.choice(inps.numDate, size=inps.numDate, replace=True) boot_ind.sort() # estimation m_boot[i] = time_func.estimate_time_func( model=model, date_list=dates[boot_ind].tolist(), dis_ts=ts_data[boot_ind], seconds=seconds)[1] prog_bar.update(i + 1, suffix='iteration {} / {}'.format( i + 1, inps.bootstrapCount)) prog_bar.close() #del ts_data # get mean/std among all bootstrap sampling m[:, mask] = m_boot.mean(axis=0).reshape(num_param, -1) m_std[:, mask] = m_boot.std(axis=0).reshape(num_param, -1) del m_boot # get design matrix to calculate the residual time series G = time_func.get_design_matrix4time_func(inps.dateList, model=model, ref_date=inps.ref_date, seconds=seconds) else: ## option 2 - least squares with uncertainty propagation G, m[:, mask], e2 = time_func.estimate_time_func( model=model, date_list=inps.dateList, dis_ts=ts_data, seconds=seconds) #del ts_data ## Compute the covariance matrix for model parameters: # G * m = d # C_m_hat = G+ * C_d * G+.T # # For ordinary least squares estimation: # G+ = (G.T * G)^-1 * G.T (option 2.1) # # For weighted least squares estimation: # G+ = (G.T * C_d^-1 * G)^-1 * G.T * C_d^-1 # => C_m_hat = (G.T * C_d^-1 * G)^-1 (option 2.2) # # Assuming normality of the observation errors (in the time domain) with a variance of sigma^2 # we have C_d = sigma^2 * I, then the above equation is simplfied into: # C_m_hat = sigma^2 * (G.T * G)^-1 (option 2.3) # # Based on the law of integrated expectation, we estimate the obs sigma^2 using # the OLS estimation residual as: # e_hat = d - d_hat # => sigma_hat^2 = (e_hat.T * e_hat) / N # => sigma^2 = sigma_hat^2 * N / (N - P) (option 2.4) # = (e_hat.T * e_hat) / (N - P) # which is the equation (10) from Fattahi and Amelung (2015, JGR) if ts_cov is not None: # option 2.1 - linear propagation from time-series (co)variance matrix # TO DO: save the full covariance matrix of the time function parameters # only the STD is saved right now covar_flag = True if len(ts_cov.shape) == 3 else False msg = 'estimating time function STD from time-serries ' msg += 'covariance pixel-by-pixel ...' if covar_flag else 'variance pixel-by-pixel ...' print(msg) # calc the common pseudo-inverse matrix Gplus = linalg.pinv(G) # loop over each pixel # or use multidimension matrix multiplication # m_cov = Gplus @ ts_cov @ Gplus.T prog_bar = ptime.progressBar(maxValue=num_pixel2inv) for i in range(num_pixel2inv): idx = idx_pixel2inv[i] # cov: time-series -> time func ts_covi = ts_cov[:, :, idx] if covar_flag else np.diag( ts_cov[:, idx]) m_cov = np.linalg.multi_dot([Gplus, ts_covi, Gplus.T]) m_std[:, idx] = np.sqrt(np.diag(m_cov)) prog_bar.update(i + 1, every=200, suffix='{}/{} pixels'.format( i + 1, num_pixel2inv)) prog_bar.close() else: # option 2.3 - assume obs errors following normal dist. in time print( 'estimating time function STD from time-series fitting residual ...' ) G_inv = linalg.inv(np.dot(G.T, G)) m_var = e2.reshape(1, -1) / (num_date - num_param) m_std[:, mask] = np.sqrt( np.dot(np.diag(G_inv).reshape(-1, 1), m_var)) # option 2.4 - simplified form for linear velocity (without matrix linear algebra) # The STD can also be calculated using Eq. (10) from Fattahi and Amelung (2015, JGR) # ts_diff = ts_data - np.dot(G, m) # t_diff = G[:, 1] - np.mean(G[:, 1]) # vel_std = np.sqrt(np.sum(ts_diff ** 2, axis=0) / np.sum(t_diff ** 2) / (num_date - 2)) # write - time func params block = [box[1], box[3], box[0], box[2]] ds_dict = model2hdf5_dataset(model, m, m_std, mask=mask)[0] for ds_name, data in ds_dict.items(): writefile.write_hdf5_block(inps.outfile, data=data.reshape(box_len, box_wid), datasetName=ds_name, block=block) # write - residual file if inps.save_res: block = [0, num_date, box[1], box[3], box[0], box[2]] ts_res = np.ones( (num_date, box_len * box_wid), dtype=np.float32) * np.nan ts_res[:, mask] = ts_data - np.dot(G, m)[:, mask] writefile.write_hdf5_block(inps.res_file, data=ts_res.reshape( num_date, box_len, box_wid), datasetName='timeseries', block=block) return inps.outfile
def run_deramp(fname, ramp_type, mask_file=None, out_file=None, datasetName=None): """ Remove ramp from each 2D matrix of input file Parameters: fname : str, data file to be derampped ramp_type : str, name of ramp to be estimated. mask_file : str, file of mask of pixels used for ramp estimation out_file : str, output file name datasetName : str, output dataset name, for ifgramStack file type only Returns: out_file : str, output file name """ start_time = time.time() atr = readfile.read_attribute(fname) k = atr['FILE_TYPE'] length = int(atr['LENGTH']) width = int(atr['WIDTH']) print('remove {} ramp from file: {}'.format(ramp_type, fname)) if not out_file: fbase, fext = os.path.splitext(fname) out_file = '{}_ramp{}'.format(fbase, fext) if k == 'ifgramStack': out_file = fname # mask if os.path.isfile(mask_file): mask = readfile.read(mask_file)[0] print('read mask file: ' + mask_file) else: mask = np.ones((length, width), dtype=np.bool_) print('use mask of the whole area') # deramping if k == 'timeseries': # write HDF5 file with defined metadata and (empty) dataset structure writefile.layout_hdf5(out_file, ref_file=fname, print_msg=True) print('estimating phase ramp one date at a time ...') date_list = timeseries(fname).get_date_list() num_date = len(date_list) prog_bar = ptime.progressBar(maxValue=num_date) for i in range(num_date): # read data = readfile.read(fname, datasetName=date_list[i])[0] # deramp data = deramp(data, mask, ramp_type=ramp_type, metadata=atr)[0] # write writefile.write_hdf5_block(out_file, data, datasetName='timeseries', block=[i, i + 1, 0, length, 0, width], print_msg=False) prog_bar.update(i + 1, suffix='{}/{}'.format(i + 1, num_date)) prog_bar.close() print('finished writing to file: {}'.format(out_file)) elif k == 'ifgramStack': obj = ifgramStack(fname) obj.open(print_msg=False) if not datasetName: datasetName = 'unwrapPhase' with h5py.File(fname, 'a') as f: ds = f[datasetName] dsNameOut = '{}_ramp'.format(datasetName) if dsNameOut in f.keys(): dsOut = f[dsNameOut] print('access HDF5 dataset /{}'.format(dsNameOut)) else: dsOut = f.create_dataset(dsNameOut, shape=(obj.numIfgram, length, width), dtype=np.float32, chunks=True, compression=None) print('create HDF5 dataset /{}'.format(dsNameOut)) prog_bar = ptime.progressBar(maxValue=obj.numIfgram) for i in range(obj.numIfgram): data = ds[i, :, :] data = deramp(data, mask, ramp_type=ramp_type, metadata=atr)[0] dsOut[i, :, :] = data prog_bar.update(i + 1, suffix='{}/{}'.format(i + 1, obj.numIfgram)) prog_bar.close() print('finished writing to file: {}'.format(fname)) # Single Dataset File else: data = readfile.read(fname)[0] data = deramp(data, mask, ramp_type, metadata=atr)[0] print('writing >>> {}'.format(out_file)) writefile.write(data, out_file=out_file, ref_file=fname) m, s = divmod(time.time() - start_time, 60) print('time used: {:02.0f} mins {:02.1f} secs.'.format(m, s)) return out_file
def calculate_delay_timeseries(tropo_file, dis_file, geom_file, GACOS_dir): """calculate delay time-series and write to HDF5 file""" ## get list of dates atr = readfile.read_attribute(dis_file) ftype = atr['FILE_TYPE'] if ftype == 'timeseries': date_list = timeseries(dis_file).get_date_list() elif ftype == '.unw': date12 = readfile.read_attribute(dis_file)['DATE12'] date_list = ptime.yyyymmdd(date12.split('-')) else: raise ValueError( 'un-supported displacement file type: {}'.format(ftype)) # list of dates --> list of ztd files ztd_files = [ os.path.join(GACOS_dir, '{}.ztd'.format(i)) for i in date_list ] # check missing ztd files flag = np.ones(len(date_list), dtype=np.bool_) for i in range(len(date_list)): if not os.path.isfile(ztd_files[i]): print('WARNING: {} file not found, ignore it and continue'.format( ztd_files[i])) flag[i] = False if np.any(flag == 0): date_list = np.array(date_list)[flag].tolist() ztd_files = np.array(ztd_files)[flag].tolist() ## update_mode def get_dataset_size(fname): atr = readfile.read_attribute(fname) return (atr['LENGTH'], atr['WIDTH']) def run_or_skip(ztd_files, tropo_file, geom_file): print('update mode: ON') print('output file: {}'.format(tropo_file)) flag = 'skip' # check existance and modification time if ut.run_or_skip(out_file=tropo_file, in_file=ztd_files, print_msg=False) == 'run': flag = 'run' print( '1) output file either do NOT exist or is NOT newer than all ZTD files.' ) else: print('1) output file exists and is newer than all ZTD files.') # check dataset size in space / time date_list = [str(re.findall('\d{8}', i)[0]) for i in ztd_files] if (get_dataset_size(tropo_file) != get_dataset_size(geom_file) or any(i not in timeseries(tropo_file).get_date_list() for i in date_list)): flag = 'run' print( '2) output file does NOT have the same len/wid as the geometry file {} or does NOT contain all dates' .format(geom_file)) else: print( '2) output file has the same len/wid as the geometry file and contains all dates' ) # check if output file is fully written with h5py.File(tropo_file, 'r') as f: if np.all(f['timeseries'][-1, :, :] == 0): flag = 'run' print('3) output file is NOT fully written.') else: print('3) output file is fully written.') # result print('run or skip: {}'.format(flag)) return flag if run_or_skip(ztd_files, tropo_file, geom_file) == 'skip': return ## prepare output file # metadata atr['FILE_TYPE'] = 'timeseries' atr['UNIT'] = 'm' # remove metadata related with double reference # because absolute delay is calculated and saved for key in ['REF_DATE', 'REF_X', 'REF_Y', 'REF_LAT', 'REF_LON']: if key in atr.keys(): atr.pop(key) # instantiate time-series length, width = int(atr['LENGTH']), int(atr['WIDTH']) num_date = len(date_list) dates = np.array(date_list, dtype=np.string_) ds_name_dict = { "date": [dates.dtype, (num_date, ), dates], "timeseries": [np.float32, (num_date, length, width), None], } writefile.layout_hdf5(tropo_file, ds_name_dict, metadata=atr) ## calculate phase delay # read geometry print('read incidenceAngle from file: {}'.format(geom_file)) inc_angle = readfile.read(geom_file, datasetName='incidenceAngle')[0] cos_inc_angle = np.cos(inc_angle * np.pi / 180.0) if 'Y_FIRST' in atr.keys(): pts_new = None else: # pixel coordinates in geometry file print('get pixel coordinates in geometry file') lats, lons = ut.get_lat_lon(atr, geom_file) pts_new = np.hstack((lats.reshape(-1, 1), lons.reshape(-1, 1))) # loop for date-by-date IO prog_bar = ptime.progressBar(maxValue=num_date) for i in range(num_date): date_str = date_list[i] ztd_file = ztd_files[i] # calc delay if 'Y_FIRST' in atr.keys(): delay = get_delay_geo(ztd_file, atr, cos_inc_angle) else: delay = get_delay_radar(ztd_file, cos_inc_angle, pts_new) # write delay to file block = [i, i + 1, 0, length, 0, width] writefile.write_hdf5_block(tropo_file, data=delay, datasetName='timeseries', block=block, print_msg=False) prog_bar.update(i + 1, suffix=os.path.basename(ztd_file)) prog_bar.close() return tropo_file
def calc_delay_timeseries(inps): """Calculate delay time-series and write it to HDF5 file. Parameters: inps : namespace, all input parameters Returns: tropo_file : str, file name of ECMWF.h5 """ def get_dataset_size(fname): atr = readfile.read_attribute(fname) shape = (int(atr['LENGTH']), int(atr['WIDTH'])) return shape def run_or_skip(grib_files, tropo_file, geom_file): print('update mode: ON') print('output file: {}'.format(tropo_file)) flag = 'skip' # check existance and modification time if ut.run_or_skip(out_file=tropo_file, in_file=grib_files, print_msg=False) == 'run': flag = 'run' print( '1) output file either do NOT exist or is NOT newer than all GRIB files.' ) else: print('1) output file exists and is newer than all GRIB files.') # check dataset size in space / time date_list = [ str(re.findall('\d{8}', os.path.basename(i))[0]) for i in grib_files ] if (get_dataset_size(tropo_file) != get_dataset_size(geom_file) or any(i not in timeseries(tropo_file).get_date_list() for i in date_list)): flag = 'run' print( '2) output file does NOT have the same len/wid as the geometry file {} or does NOT contain all dates' .format(geom_file)) else: print( '2) output file has the same len/wid as the geometry file and contains all dates' ) # check if output file is fully written with h5py.File(tropo_file, 'r') as f: if np.all(f['timeseries'][-1, :, :] == 0): flag = 'run' print('3) output file is NOT fully written.') else: print('3) output file is fully written.') # result print('run or skip: {}'.format(flag)) return flag if run_or_skip(inps.grib_files, inps.tropo_file, inps.geom_file) == 'skip': return ## 1. prepare geometry data geom_obj = geometry(inps.geom_file) geom_obj.open() inps.inc = geom_obj.read(datasetName='incidenceAngle') inps.dem = geom_obj.read(datasetName='height') # for testing if inps.custom_height: print( 'use input custom height of {} m for vertical integration'.format( inps.custom_height)) inps.dem[:] = inps.custom_height if 'latitude' in geom_obj.datasetNames: # for lookup table in radar-coord (isce, doris) inps.lat = geom_obj.read(datasetName='latitude') inps.lon = geom_obj.read(datasetName='longitude') elif 'Y_FIRST' in geom_obj.metadata: # for lookup table in geo-coded (gamma, roipac) and obs. in geo-coord inps.lat, inps.lon = ut.get_lat_lon(geom_obj.metadata) # convert coordinates to lat/lon, e.g. from UTM for ASF HyPP3 if not geom_obj.metadata['Y_UNIT'].startswith('deg'): inps.lat, inps.lon = ut.to_latlon(inps.atr['OG_FILE_PATH'], inps.lon, inps.lat) else: # for lookup table in geo-coded (gamma, roipac) and obs. in radar-coord inps.lat, inps.lon = ut.get_lat_lon_rdc(inps.atr) # mask of valid pixels mask = np.multiply(inps.inc != 0, ~np.isnan(inps.inc)) ## 2. prepare output file # metadata atr = inps.atr.copy() atr['FILE_TYPE'] = 'timeseries' atr['UNIT'] = 'm' # remove metadata related with double reference # because absolute delay is calculated and saved for key in ['REF_DATE', 'REF_X', 'REF_Y', 'REF_LAT', 'REF_LON']: if key in atr.keys(): atr.pop(key) # instantiate time-series length, width = int(atr['LENGTH']), int(atr['WIDTH']) num_date = len(inps.grib_files) date_list = [ str(re.findall('\d{8}', os.path.basename(i))[0]) for i in inps.grib_files ] dates = np.array(date_list, dtype=np.string_) ds_name_dict = { "date": [dates.dtype, (num_date, ), dates], "timeseries": [np.float32, (num_date, length, width), None], } writefile.layout_hdf5(inps.tropo_file, ds_name_dict, metadata=atr) ## 3. calculate phase delay print( '\n------------------------------------------------------------------------------' ) print( 'calculating absolute delay for each date using PyAPS (Jolivet et al., 2011; 2014) ...' ) print('number of grib files used: {}'.format(num_date)) prog_bar = ptime.progressBar(maxValue=num_date, print_msg=~inps.verbose) for i in range(num_date): grib_file = inps.grib_files[i] # calc tropo delay tropo_data = get_delay(grib_file, tropo_model=inps.tropo_model, delay_type=inps.delay_type, dem=inps.dem, inc=inps.inc, lat=inps.lat, lon=inps.lon, mask=mask, verbose=inps.verbose) # write tropo delay to file block = [i, i + 1, 0, length, 0, width] writefile.write_hdf5_block(inps.tropo_file, data=tropo_data, datasetName='timeseries', block=block, print_msg=False) prog_bar.update(i + 1, suffix=os.path.basename(grib_file)) prog_bar.close() return inps.tropo_file
def diff_file(file1, file2, out_file=None, force=False, max_num_pixel=2e8): """calculate/write file1 - file2 Parameters: file1 - str, path of file1 file2 - list of str, path of file2(s) out_file - str, path of output file force - bool, overwrite existing output file max_num_pixel - float, maximum number of pixels for each block """ start_time = time.time() if not out_file: fbase, fext = os.path.splitext(file1) if len(file2) > 1: raise ValueError( 'Output file name is needed for more than 2 files input.') out_file = '{}_diff_{}{}'.format( fbase, os.path.splitext(os.path.basename(file2[0]))[0], fext) print('{} - {} --> {}'.format(file1, file2, out_file)) # Read basic info atr1 = readfile.read_attribute(file1) k1 = atr1['FILE_TYPE'] atr2 = readfile.read_attribute(file2[0]) k2 = atr2['FILE_TYPE'] print('input files are: {} and {}'.format(k1, k2)) if k1 == 'timeseries': if k2 not in ['timeseries', 'giantTimeseries']: raise Exception( 'Input multiple dataset files are not the same file type!') if len(file2) > 1: raise Exception( ('Only 2 files substraction is supported for time-series file,' ' {} input.'.format(len(file2) + 1))) atr1 = readfile.read_attribute(file1) atr2 = readfile.read_attribute(file2[0]) dateList1 = timeseries(file1).get_date_list() if k2 == 'timeseries': dateList2 = timeseries(file2[0]).get_date_list() unit_fac = 1. elif k2 == 'giantTimeseries': dateList2 = giantTimeseries(file2[0]).get_date_list() unit_fac = 0.001 # check reference point ref_date, ref_y, ref_x = check_reference(atr1, atr2) # check dates shared by two timeseries files dateListShared = [i for i in dateList1 if i in dateList2] dateShared = np.ones((len(dateList1)), dtype=np.bool_) if dateListShared != dateList1: print('WARNING: {} does not contain all dates in {}'.format( file2, file1)) if force: dateListEx = list(set(dateList1) - set(dateListShared)) print( 'Continue and enforce the differencing for their shared dates only.' ) print( '\twith following dates are ignored for differencing:\n{}'. format(dateListEx)) dateShared[np.array([dateList1.index(i) for i in dateListEx])] = 0 else: raise Exception( 'To enforce the differencing anyway, use --force option.') # instantiate the output file writefile.layout_hdf5(out_file, ref_file=file1) # block-by-block IO length, width = int(atr1['LENGTH']), int(atr1['WIDTH']) num_box = int(np.ceil(len(dateList1) * length * width / max_num_pixel)) box_list = cluster.split_box2sub_boxes(box=(0, 0, width, length), num_split=num_box, dimension='y', print_msg=True) if ref_y and ref_x: ref_box = (ref_x, ref_y, ref_x + 1, ref_y + 1) ref_val = readfile.read(file2[0], datasetName=dateListShared, box=ref_box)[0] * unit_fac for i, box in enumerate(box_list): if num_box > 1: print('\n------- processing patch {} out of {} --------------'. format(i + 1, num_box)) print('box: {}'.format(box)) # read data2 (consider different reference_date/pixel) print('read from file: {}'.format(file2[0])) data2 = readfile.read( file2[0], datasetName=dateListShared, box=box)[0] * unit_fac if ref_y and ref_x: print('* referencing data from {} to y/x: {}/{}'.format( os.path.basename(file2[0]), ref_y, ref_x)) data2 -= np.tile(ref_val.reshape(-1, 1, 1), (1, data2.shape[1], data2.shape[2])) if ref_date: print('* referencing data from {} to date: {}'.format( os.path.basename(file2[0]), ref_date)) ref_ind = dateListShared.index(ref_date) data2 -= np.tile(data2[ref_ind, :, :], (data2.shape[0], 1, 1)) # read data1 print('read from file: {}'.format(file1)) data = readfile.read(file1, box=box)[0] # apply differencing mask = data == 0. data[dateShared] -= data2 data[mask] = 0. # Do not change zero phase value del data2 # write the block block = [0, data.shape[0], box[1], box[3], box[0], box[2]] writefile.write_hdf5_block(out_file, data=data, datasetName=k1, block=block) elif all(i == 'ifgramStack' for i in [k1, k2]): obj1 = ifgramStack(file1) obj1.open() obj2 = ifgramStack(file2[0]) obj2.open() dsNames = list(set(obj1.datasetNames) & set(obj2.datasetNames)) if len(dsNames) == 0: raise ValueError('no common dataset between two files!') dsName = [i for i in ifgramDatasetNames if i in dsNames][0] # read data print('reading {} from file {} ...'.format(dsName, file1)) data1 = readfile.read(file1, datasetName=dsName)[0] print('reading {} from file {} ...'.format(dsName, file2[0])) data2 = readfile.read(file2[0], datasetName=dsName)[0] # consider reference pixel if 'unwrapphase' in dsName.lower(): print('referencing to pixel ({},{}) ...'.format( obj1.refY, obj1.refX)) ref1 = data1[:, obj1.refY, obj1.refX] ref2 = data2[:, obj2.refY, obj2.refX] for i in range(data1.shape[0]): data1[i, :][data1[i, :] != 0.] -= ref1[i] data2[i, :][data2[i, :] != 0.] -= ref2[i] # operation and ignore zero values data1[data1 == 0] = np.nan data2[data2 == 0] = np.nan data = data1 - data2 del data1, data2 data[np.isnan(data)] = 0. # write to file dsDict = {} dsDict[dsName] = data writefile.write(dsDict, out_file=out_file, ref_file=file1) # Sing dataset file else: data1 = readfile.read(file1)[0] data = np.array(data1, data1.dtype) for fname in file2: data2 = readfile.read(fname)[0] data = np.array(data, dtype=np.float32) - np.array( data2, dtype=np.float32) data = np.array(data, data1.dtype) print('writing >>> ' + out_file) writefile.write(data, out_file=out_file, metadata=atr1) m, s = divmod(time.time() - start_time, 60) print('time used: {:02.0f} mins {:02.1f} secs'.format(m, s)) return out_file
def main(iargs=None): inps = cmd_line_parse(iargs) key = 'geolocation_corrected' with h5py.File(inps.geometry_file, 'r') as f: keys = f.attrs.keys() latitude = f['latitude'][:, :] longitude = f['longitude'][:, :] atr = readfile.read(inps.geometry_file, datasetName='azimuthAngle')[1] if not key in keys or atr[key] == 'no': status = 'run' print('Run geolocation correction ...') else: status = 'skip' print( 'Geolocation is already done, you may reverse it using --reverse. skip ...' ) if inps.reverse: if key in keys and atr[key] == 'yes': status = 'run' print('Run reversing geolocation correction ...') else: status = 'skip' print('The file is not corrected for geolocation. skip ...') if status == 'run': az_angle = np.deg2rad(np.float(atr['HEADING'])) inc_angle = np.deg2rad( readfile.read(inps.geometry_file, datasetName='incidenceAngle')[0]) dem_error = readfile.read(inps.dem_error_file, datasetName='dem')[0] rad_latitude = np.deg2rad(latitude) one_degree_latitude = 111132.92 - 559.82 * np.cos(2*rad_latitude) + \ 1.175 * np.cos(4 * rad_latitude) - 0.0023 * np.cos(6 * rad_latitude) one_degree_longitude = 111412.84 * np.cos(rad_latitude) - \ 93.5 * np.cos(3 * rad_latitude) + 0.118 * np.cos(5 * rad_latitude) dx = np.divide(dem_error * (1 / np.tan(inc_angle)) * np.cos(az_angle), one_degree_longitude) # converted to degree dy = np.divide(dem_error * (1 / np.tan(inc_angle)) * np.sin(az_angle), one_degree_latitude) # converted to degree if inps.reverse: sign = np.sign(latitude) latitude -= sign * dy sign = np.sign(longitude) if atr['ORBIT_DIRECTION'] == 'Ascending': longitude += sign * dx else: longitude -= sign * dx atr[key] = 'no' block = [0, latitude.shape[0], 0, latitude.shape[1]] writefile.write_hdf5_block(inps.geometry_file, data=latitude, datasetName='latitude', block=block) writefile.write_hdf5_block(inps.geometry_file, data=longitude, datasetName='longitude', block=block) ut.add_attribute(inps.geometry_file, atr_new=atr) else: sign = np.sign(latitude) latitude += sign * dy sign = np.sign(longitude) if atr['ORBIT_DIRECTION'] == 'Ascending': longitude -= sign * dx else: longitude += sign * dx atr[key] = 'yes' block = [0, latitude.shape[0], 0, latitude.shape[1]] writefile.write_hdf5_block(inps.geometry_file, data=latitude, datasetName='latitude', block=block) writefile.write_hdf5_block(inps.geometry_file, data=longitude, datasetName='longitude', block=block) ut.add_attribute(inps.geometry_file, atr_new=atr) f.close() return
def main(iargs=None): """ Overwrite filtered SLC images in Isce merged/SLC directory. """ Parser = MinoPyParser(iargs, script='generate_temporal_coherence') inps = Parser.parse() dateStr = datetime.datetime.strftime(datetime.datetime.now(), '%Y%m%d:%H%M%S') if not iargs is None: msg = os.path.basename(__file__) + ' ' + ' '.join(iargs[:]) string = dateStr + " * " + msg print(string) else: msg = os.path.basename(__file__) + ' ' + ' '.join(sys.argv[1::]) string = dateStr + " * " + msg print(string) start_time = time.time() os.chdir(inps.work_dir) minopy_dir = os.path.dirname(inps.work_dir) minopy_template_file = os.path.join(minopy_dir, 'minopyApp.cfg') inps.ifgramStackFile = os.path.join(inps.work_dir, 'inputs/ifgramStack.h5') template = readfile.read_template(minopy_template_file) if template['minopy.timeseries.tempCohType'] == 'auto': template['minopy.timeseries.tempCohType'] = 'full' atr = {} atr['minopy.timeseries.tempCohType'] = template[ 'minopy.timeseries.tempCohType'] ut.add_attribute(inps.ifgramStackFile, atr) # check if input observation dataset exists. stack_obj = ifgramStack(inps.ifgramStackFile) stack_obj.open(print_msg=False) metadata = stack_obj.get_metadata() length, width = stack_obj.length, stack_obj.width inps.invQualityFile = 'temporalCoherence.h5' mintpy_mask_file = os.path.join(inps.work_dir, 'maskTempCoh.h5') quality_name = os.path.join( minopy_dir, 'inverted/tempCoh_{}'.format( template['minopy.timeseries.tempCohType'])) quality = np.memmap(quality_name, mode='r', dtype='float32', shape=(length, width)) # inps.waterMaskFile = os.path.join(minopy_dir, 'waterMask.h5') inps.waterMaskFile = None water_mask = np.ones(quality.shape, dtype=np.int8) if template['minopy.timeseries.waterMask'] != 'auto': inps.waterMaskFile = template['minopy.timeseries.waterMask'] if os.path.exists(inps.waterMaskFile): with h5py.File(inps.waterMaskFile, 'r') as f2: if 'waterMask' in f2: water_mask = f2['waterMask'][:, :] else: water_mask = f2['mask'][:, :] if inps.shadow_mask: if os.path.exists(os.path.join(minopy_dir, 'shadow_mask.h5')): with h5py.File(os.path.join(minopy_dir, 'shadow_mask.h5'), 'r') as f2: shadow_mask = f2['mask'][:, :] water_mask = water_mask * shadow_mask inv_quality = np.zeros((quality.shape[0], quality.shape[1])) inv_quality_name = 'temporalCoherence' inv_quality[:, :] = quality[:, :] inv_quality[inv_quality <= 0] = np.nan inv_quality[water_mask < 0.5] = np.nan if os.path.exists(mintpy_mask_file): mintpy_mask = readfile.read(mintpy_mask_file, datasetName='mask')[0] inv_quality[mintpy_mask == 0] = np.nan if not os.path.exists(inps.invQualityFile): metadata['UNIT'] = '1' metadata['FILE_TYPE'] = inv_quality_name if 'REF_DATE' in metadata: metadata.pop('REF_DATE') ds_name_dict = {metadata['FILE_TYPE']: [np.float32, (length, width)]} writefile.layout_hdf5(inps.invQualityFile, ds_name_dict, metadata=metadata) # write the block to disk # with 3D block in [z0, z1, y0, y1, x0, x1] # and 2D block in [y0, y1, x0, x1] block = [0, length, 0, width] writefile.write_hdf5_block(inps.invQualityFile, data=inv_quality, datasetName=inv_quality_name, block=block) get_phase_linking_coherence_mask(metadata, inps.work_dir) m, s = divmod(time.time() - start_time, 60) print('time used: {:02.0f} mins {:02.1f} secs.\n'.format(m, s)) return
def multilook_file(infile, lks_y, lks_x, outfile=None, method='average', margin=[0, 0, 0, 0], max_memory=4): """ Multilook input file Parameters: infile - str, path of input file to be multilooked. lks_y - int, number of looks in y / row direction. lks_x - int, number of looks in x / column direction. margin - list of 4 int, number of pixels to be skipped during multilooking. useful for offset product, where the marginal pixels are ignored during cross correlation matching. outfile - str, path of output file Returns: outfile - str, path of output file """ lks_y = int(lks_y) lks_x = int(lks_x) # input file info atr = readfile.read_attribute(infile) length, width = int(atr['LENGTH']), int(atr['WIDTH']) k = atr['FILE_TYPE'] print('multilooking {} {} file: {}'.format(atr['PROCESSOR'], k, infile)) print('number of looks in y / azimuth direction: %d' % lks_y) print('number of looks in x / range direction: %d' % lks_x) print('multilook method: {}'.format(method)) # margin --> box if margin is not [0, 0, 0, 0]: # top, bottom, left, right box = (margin[2], margin[0], width - margin[3], length - margin[1]) print( 'number of pixels to skip in top/bottom/left/right boundaries: {}'. format(margin)) else: box = (0, 0, width, length) # output file name ext = os.path.splitext(infile)[1] if not outfile: if os.getcwd() == os.path.dirname(os.path.abspath(infile)): outfile = os.path.splitext(infile)[0] + '_' + str( lks_y) + 'alks_' + str(lks_x) + 'rlks' + ext else: outfile = os.path.basename(infile) # update metadata atr = attr.update_attribute4multilook(atr, lks_y, lks_x, box=box) if ext in ['.h5', '.he5']: writefile.layout_hdf5(outfile, metadata=atr, ref_file=infile) # read source data and multilooking dsNames = readfile.get_dataset_list(infile) maxDigit = max([len(i) for i in dsNames]) dsDict = dict() for dsName in dsNames: print('multilooking {d:<{w}} from {f} ...'.format( d=dsName, w=maxDigit, f=os.path.basename(infile))) # split in Y/row direction for IO for HDF5 only if ext in ['.h5', '.he5']: # calc step size with memory usage up to 4 GB with h5py.File(infile, 'r') as f: ds = f[dsName] ds_size = np.prod(ds.shape) * 4 num_step = int(np.ceil(ds_size * 4 / (max_memory * 1024**3))) row_step = int(np.rint(length / num_step / 10) * 10) row_step = max(row_step, 10) else: row_step = box[3] - box[1] num_step = int(np.ceil((box[3] - box[1]) / (row_step * lks_y))) for i in range(num_step): r0 = box[1] + row_step * lks_y * i r1 = box[1] + row_step * lks_y * (i + 1) r1 = min(r1, box[3]) # IO box box_i = (box[0], r0, box[2], r1) box_o = (int((box[0] - box[0]) / lks_x), int( (r0 - box[1]) / lks_y), int( (box[2] - box[0]) / lks_x), int((r1 - box[1]) / lks_y)) print('box: {}'.format(box_o)) # read / multilook if method == 'nearest': data = readfile.read(infile, datasetName=dsName, box=box_i, xstep=lks_x, ystep=lks_y, print_msg=False)[0] else: data = readfile.read(infile, datasetName=dsName, box=box_i, print_msg=False)[0] data = multilook_data(data, lks_y, lks_x) # output block if data.ndim == 3: block = [ 0, data.shape[0], box_o[1], box_o[3], box_o[0], box_o[2] ] else: block = [box_o[1], box_o[3], box_o[0], box_o[2]] # write if ext in ['.h5', '.he5']: writefile.write_hdf5_block(outfile, data=data, datasetName=dsName, block=block, print_msg=False) else: dsDict[dsName] = data # for binary file with 2 bands, always use BIL scheme if (len(dsDict.keys()) == 2 and os.path.splitext(infile)[1] not in ['.h5', '.he5'] and atr.get('scheme', 'BIL').upper() != 'BIL'): print('the input binary file has 2 bands with band interleave as: {}'. format(atr['scheme'])) print( 'for the output binary file, change the band interleave to BIL as default.' ) atr['scheme'] = 'BIL' if ext not in ['.h5', '.he5']: writefile.write(dsDict, out_file=outfile, metadata=atr, ref_file=infile) # write extra metadata files for ISCE data files if os.path.isfile(infile + '.xml') or os.path.isfile(infile + '.aux.xml'): # write ISCE XML file dtype_gdal = readfile.NUMPY2GDAL_DATATYPE[atr['DATA_TYPE']] dtype_isce = readfile.GDAL2ISCE_DATATYPE[dtype_gdal] writefile.write_isce_xml(outfile, width=int(atr['WIDTH']), length=int(atr['LENGTH']), bands=len(dsDict.keys()), data_type=dtype_isce, scheme=atr['scheme'], image_type=atr['FILE_TYPE']) print(f'write file: {outfile}.xml') # write GDAL VRT file if os.path.isfile(infile + '.vrt'): from isceobj.Util.ImageUtil import ImageLib as IML img = IML.loadImage(outfile)[0] img.renderVRT() print(f'write file: {outfile}.vrt') return outfile