def add_file(fnames, out_file=None): """Generate sum of all input files Parameters: fnames : list of str, path/name of input files to be added out_file : str, optional, path/name of output file Returns: out_file : str, path/name of output file Example: 'mask_all.h5' = add_file(['mask_1.h5','mask_2.h5','mask_3.h5'], 'mask_all.h5') """ # Default output file name ext = os.path.splitext(fnames[0])[1] if not out_file: out_file = os.path.splitext(fnames[0])[0] for i in range(1, len(fnames)): out_file += '_plus_' + os.path.splitext(os.path.basename(fnames[i]))[0] out_file += ext atr = readfile.read_attribute(fnames[0]) dsNames = readfile.get_dataset_list(fnames[0]) dsDict = {} for dsName in dsNames: print('adding {} ...'.format(dsName)) data = readfile.read(fnames[0], datasetName=dsName)[0] for i in range(1, len(fnames)): d = readfile.read(fnames[i], datasetName=dsName)[0] data = add_matrix(data, d) dsDict[dsName] = data writefile.write(dsDict, out_file=out_file, metadata=atr, ref_file=fnames[0]) return out_file
def mask_file(fname, mask_file, out_file, inps=None): """ Mask input fname with mask_file Inputs: fname/mask_file - string, inps_dict - dictionary including the following options: subset_x/y - list of 2 ints, subset in x/y direction threshold - float, threshold/minValue to generate mask Output: out_file - string """ if not inps: inps = cmd_line_parse() # read mask_file mask = readfile.read(mask_file)[0] mask = update_mask_with_inps(mask, inps) # masking input file dsNames = readfile.get_dataset_list(fname) maxDigit = max([len(i) for i in dsNames]) dsDict = {} for dsName in dsNames: if dsName not in ['coherence']: print('masking {d:<{w}} from {f} ...'.format(d=dsName, w=maxDigit, f=fname)) data = readfile.read(fname, datasetName=dsName, print_msg=False)[0] data = mask_matrix(data, mask, fill_value=inps.fill_value) dsDict[dsName] = data # default output filename if not out_file: fbase, fext = os.path.splitext(fname) out_file = '{}_msk{}'.format(fbase, fext) writefile.write(dsDict, out_file=out_file, ref_file=fname) return out_file
def file_operation(fname, operator, operand, out_file=None): """Mathmathic operation of file""" # Basic Info atr = readfile.read_attribute(fname) k = atr['FILE_TYPE'] print('input is '+k+' file: '+fname) print('operation: file %s %f' % (operator, operand)) # default output filename if not out_file: if operator in ['+', 'plus', 'add', 'addition']: suffix = 'plus' elif operator in ['-', 'minus', 'substract', 'substraction']: suffix = 'minus' elif operator in ['*', 'times', 'multiply', 'multiplication']: suffix = 'multiply' elif operator in ['/', 'obelus', 'divide', 'division']: suffix = 'divide' elif operator in ['^', 'pow', 'power']: suffix = 'pow' out_file = '{}_{}{}{}'.format(os.path.splitext(fname)[0], suffix, str(operand), os.path.splitext(fname)[1]) atr = readfile.read_attribute(fname) dsNames = readfile.get_dataset_list(fname) dsDict = {} for dsName in dsNames: data = readfile.read(fname, datasetName=dsName)[0] data = data_operation(data, operator, operand) dsDict[dsName] = data writefile.write(dsDict, out_file=out_file, metadata=atr, ref_file=fname) return out_file
def multilook_file(infile, lks_y, lks_x, outfile=None): lks_y = int(lks_y) lks_x = int(lks_x) # input file info atr = readfile.read_attribute(infile) k = atr['FILE_TYPE'] print('multilooking {} {} file: {}'.format(atr['PROCESSOR'], k, infile)) print('number of looks in y / azimuth direction: %d' % lks_y) print('number of looks in x / range direction: %d' % lks_x) # output file name if not outfile: if os.getcwd() == os.path.dirname(os.path.abspath(infile)): ext = os.path.splitext(infile)[1] outfile = os.path.splitext(infile)[0]+'_'+str(lks_y)+'alks_'+str(lks_x)+'rlks'+ext else: outfile = os.path.basename(infile) #print('writing >>> '+outfile) # read source data and multilooking dsNames = readfile.get_dataset_list(infile) maxDigit = max([len(i) for i in dsNames]) dsDict = dict() for dsName in dsNames: print('multilooking {d:<{w}} from {f} ...'.format( d=dsName, w=maxDigit, f=os.path.basename(infile))) data = readfile.read(infile, datasetName=dsName, print_msg=False)[0] data = multilook_data(data, lks_y, lks_x) dsDict[dsName] = data atr = multilook_attribute(atr, lks_y, lks_x) writefile.write(dsDict, out_file=outfile, metadata=atr, ref_file=infile) return outfile
def run_load_data(self, step_name): """Load InSAR stacks into HDF5 files in ./inputs folder. It 1) copy auxiliary files into work directory (for Unvi of Miami only) 2) load all interferograms stack files into mintpy/inputs directory. 3) check loading result 4) add custom metadata (optional, for HDF-EOS5 format only) """ # 1) copy aux files (optional) self._copy_aux_file() # 2) loading data scp_args = '--template {}'.format(self.templateFile) if self.customTemplateFile: scp_args += ' {}'.format(self.customTemplateFile) if self.projectName: scp_args += ' --project {}'.format(self.projectName) # run print("load_data.py", scp_args) mintpy.load_data.main(scp_args.split()) os.chdir(self.workDir) # 3) check loading result load_complete, stack_file, geom_file = ut.check_loaded_dataset(self.workDir, print_msg=True)[0:3] # 3.1) output waterMask.h5 water_mask_file = 'waterMask.h5' if 'waterMask' in readfile.get_dataset_list(geom_file): print('generate {} from {} for conveniency'.format(water_mask_file, geom_file)) if ut.run_or_skip(out_file=water_mask_file, in_file=geom_file) == 'run': water_mask, atr = readfile.read(geom_file, datasetName='waterMask') atr['FILE_TYPE'] = 'waterMask' writefile.write(water_mask, out_file=water_mask_file, metadata=atr) # 4) add custom metadata (optional) if self.customTemplateFile: print('updating {}, {} metadata based on custom template file: {}'.format( os.path.basename(stack_file), os.path.basename(geom_file), os.path.basename(self.customTemplateFile))) # use ut.add_attribute() instead of add_attribute.py because of # better control of special metadata, such as SUBSET_X/YMIN ut.add_attribute(stack_file, self.customTemplate) ut.add_attribute(geom_file, self.customTemplate) # 5) if not load_complete, plot and raise exception if not load_complete: # plot result if error occured self.plot_result(print_aux=False, plot=plot) # go back to original directory print('Go back to directory:', self.cwd) os.chdir(self.cwd) # raise error msg = 'step {}: NOT all required dataset found, exit.'.format(step_name) raise RuntimeError(msg) return
def plot_data(): atr = readfile.read_attribute(file.get()) file_type = atr['FILE_TYPE'] datasets = readfile.get_dataset_list(file.get(), file_type) item = tree.focus() the_item = tree.item(item) epoch_num = the_item['text'] if epoch_num in datasets: view.main([file.get(), epoch_num])
def get_geometry_file(dset_list, work_dir=None, coord='geo', abspath=True, print_msg=True): """Find geometry file containing input specific dataset""" if isinstance(dset_list, str): dset_list = [dset_list] for dset in dset_list: if dset not in geometryDatasetNames: raise ValueError( 'unrecognized geometry dataset name: {}'.format(dset)) if not work_dir: work_dir = os.getcwd() # search *geometry*.h5 files fname_list = [ os.path.join(work_dir, i) for i in ['*geometry*.h5', '*/*geometry*.h5', '../*/geometry*.h5'] ] fname_list = get_file_list(fname_list, coord=coord) if len(fname_list) == 0: if print_msg: print('No geometry file found.') return None # check dset in the existing h5 files for fname in list( fname_list ): #use list() as temp copy to handle varing list during the loop if any(dset not in readfile.get_dataset_list(fname) for dset in dset_list): fname_list.remove(fname) if len(fname_list) == 0: if print_msg: print('No geometry file with dataset {} found'.format(dset_list)) return None geom_file = fname_list[0] if abspath: geom_file = os.path.abspath(geom_file) return geom_file
def get_bounding_box(meta, geom_file=None): """Get lat/lon range (roughly), in the same order of data file lat0/lon0 - starting latitude/longitude (first row/column) lat1/lon1 - ending latitude/longitude (last row/column) """ length, width = int(meta['LENGTH']), int(meta['WIDTH']) if 'Y_FIRST' in meta.keys(): # geo coordinates lat0 = float(meta['Y_FIRST']) lon0 = float(meta['X_FIRST']) lat_step = float(meta['Y_STEP']) lon_step = float(meta['X_STEP']) lat1 = lat0 + lat_step * (length - 1) lon1 = lon0 + lon_step * (width - 1) else: # radar coordinates if geom_file and os.path.isfile(geom_file): geom_dset_list = readfile.get_dataset_list(geom_file) else: geom_dset_list = [] if 'latitude' in geom_dset_list: lats = readfile.read(geom_file, datasetName='latitude')[0] lons = readfile.read(geom_file, datasetName='longitude')[0] lats[lats == 0] = np.nan lons[lons == 0] = np.nan lat0 = np.nanmin(lats) lat1 = np.nanmax(lats) lon0 = np.nanmin(lons) lon1 = np.nanmax(lons) else: lats = [float(meta['LAT_REF{}'.format(i)]) for i in [1, 2, 3, 4]] lons = [float(meta['LON_REF{}'.format(i)]) for i in [1, 2, 3, 4]] lat0 = np.mean(lats[0:2]) lat1 = np.mean(lats[2:4]) lon0 = np.mean(lons[0:3:2]) lon1 = np.mean(lons[1:4:2]) return lat0, lat1, lon0, lon1
def mask_file(fname, mask_file, out_file, inps=None): """ Mask input fname with mask_file Inputs: fname/mask_file - string, inps_dict - dictionary including the following options: subset_x/y - list of 2 ints, subset in x/y direction threshold - float, threshold/minValue to generate mask Output: out_file - string """ if not inps: inps = cmd_line_parse() # read mask_file mask = readfile.read(mask_file)[0] mask = update_mask_with_inps(mask, inps) # masking input file dsNames = readfile.get_dataset_list(fname) maxDigit = max([len(i) for i in dsNames]) dsDict = {} for dsName in dsNames: if dsName not in ['coherence']: print('masking {d:<{w}} from {f} ...'.format(d=dsName, w=maxDigit, f=fname)) data = readfile.read(fname, datasetName=dsName, print_msg=False)[0] data = mask_matrix(data, mask, fill_value=inps.fill_value) else: data = readfile.read(fname, datasetName=dsName, print_msg=False)[0] data = mask_matrix(data, mask, fill_value=0) dsDict[dsName] = data # default output filename if not out_file: fbase, fext = os.path.splitext(fname) out_file = '{}_msk{}'.format(fbase, fext) writefile.write(dsDict, out_file=out_file, ref_file=fname) return out_file
def run_geocode(self, step_name): """geocode data files in radar coordinates into ./geo folder.""" if self.template['mintpy.geocode']: ts_file = self.get_timeseries_filename(self.template)[step_name]['input'] atr = readfile.read_attribute(ts_file) if 'Y_FIRST' not in atr.keys(): # 1. geocode out_dir = os.path.join(self.workDir, 'geo') if not os.path.isdir(out_dir): os.makedirs(out_dir) print('create directory:', out_dir) geom_file, lookup_file = ut.check_loaded_dataset(self.workDir, print_msg=False)[2:4] in_files = [geom_file, 'temporalCoherence.h5', ts_file, 'velocity.h5'] scp_args = '-l {l} -t {t} --outdir {o} --update '.format(l=lookup_file, t=self.templateFile, o=out_dir) for in_file in in_files: scp_args += ' {}'.format(in_file) print('geocode.py', scp_args) mintpy.geocode.main(scp_args.split()) # 2. generate reliable pixel mask in geo coordinate geom_file = os.path.join(out_dir, 'geo_{}'.format(os.path.basename(geom_file))) tcoh_file = os.path.join(out_dir, 'geo_temporalCoherence.h5') mask_file = os.path.join(out_dir, 'geo_maskTempCoh.h5') tcoh_min = self.template['mintpy.networkInversion.minTempCoh'] scp_args = '{} -m {} -o {}'.format(tcoh_file, tcoh_min, mask_file) # exclude pixels in shadow if shadowMask dataset is available if 'shadowMask' in readfile.get_dataset_list(geom_file): scp_args += ' --base {} --base-dataset shadowMask --base-value 1'.format(geom_file) print('generate_mask.py', scp_args) if ut.run_or_skip(out_file=mask_file, in_file=tcoh_file) == 'run': mintpy.generate_mask.main(scp_args.split()) else: print('dataset is geocoded, skip geocoding and continue.') else: print('geocoding is OFF') return
def get_geometry_file(dset, geocoded=False, abspath=True, print_msg=True): """Find geometry file containing input specific dataset""" if dset not in geometryDatasetNames: raise ValueError('unrecognized geometry dataset name: {}'.format(dset)) if geocoded: geom_file = './inputs/geometryGeo.h5' else: geom_file = './inputs/geometryRadar.h5' if not os.path.isfile(geom_file): print('geometry file {} does not exist.'.format(geom_file)) return None if dset not in readfile.get_dataset_list(geom_file): print('dataset {} not found in file {}'.format(dset, geom_file)) return None if abspath: geom_file = os.path.abspath(geom_file) return geom_file
def get_geometry_file(dset, geocoded=False, abspath=True, print_msg=True): """Find geometry file containing input specific dataset""" if dset not in geometryDatasetNames: raise ValueError('unrecognized geometry dataset name: {}'.format(dset)) if geocoded: geom_file = './inputs/geometryGeo.h5' else: geom_file = './inputs/geometryRadar.h5' if not os.path.isfile(geom_file): print('geometry file {} does not exist.'.format(geom_file)) return None if dset not in readfile.get_dataset_list(geom_file): print('dataset {} not found in file {}'.format(dset, geom_file)) return None if abspath: geom_file = os.path.abspath(geom_file) return geom_file
def add_file(fnames, out_file=None): """Generate sum of all input files Parameters: fnames : list of str, path/name of input files to be added out_file : str, optional, path/name of output file Returns: out_file : str, path/name of output file Example: 'mask_all.h5' = add_file(['mask_1.h5','mask_2.h5','mask_3.h5'], 'mask_all.h5') """ # Default output file name ext = os.path.splitext(fnames[0])[1] if not out_file: out_file = os.path.splitext(fnames[0])[0] for i in range(1, len(fnames)): out_file += '_plus_' + os.path.splitext(os.path.basename( fnames[i]))[0] out_file += ext dsDict = {} dsNames = readfile.get_dataset_list(fnames[0]) for dsName in dsNames: # ignore dsName if input file has single dataset if len(dsNames) == 1: dsName2read = None else: dsName2read = dsName print('adding {} ...'.format(dsName)) data = readfile.read(fnames[0], datasetName=dsName2read)[0] for i in range(1, len(fnames)): d = readfile.read(fnames[i], datasetName=dsName2read)[0] data = add_matrix(data, d) dsDict[dsName] = data # output atr = readfile.read_attribute(fnames[0]) print('use metadata from the 1st file: {}'.format(fnames[0])) writefile.write(dsDict, out_file=out_file, metadata=atr, ref_file=fnames[0]) return out_file
def multilook_file(infile, lks_y, lks_x, outfile=None): lks_y = int(lks_y) lks_x = int(lks_x) # input file info atr = readfile.read_attribute(infile) k = atr['FILE_TYPE'] print('multilooking {} {} file: {}'.format(atr['PROCESSOR'], k, infile)) print('number of looks in y / azimuth direction: %d' % lks_y) print('number of looks in x / range direction: %d' % lks_x) # output file name if not outfile: if os.getcwd() == os.path.dirname(os.path.abspath(infile)): ext = os.path.splitext(infile)[1] outfile = os.path.splitext(infile)[0] + '_' + str( lks_y) + 'alks_' + str(lks_x) + 'rlks' + ext else: outfile = os.path.basename(infile) #print('writing >>> '+outfile) # read source data and multilooking dsNames = readfile.get_dataset_list(infile) maxDigit = max([len(i) for i in dsNames]) dsDict = dict() for dsName in dsNames: print('multilooking {d:<{w}} from {f} ...'.format( d=dsName, w=maxDigit, f=os.path.basename(infile))) data = readfile.read(infile, datasetName=dsName, print_msg=False)[0] # keep timeseries data as 3D matrix when there is only one acquisition # because readfile.read() will squeeze it to 2D if atr['FILE_TYPE'] == 'timeseries' and len(data.shape) == 2: data = np.reshape(data, (1, data.shape[0], data.shape[1])) data = multilook_data(data, lks_y, lks_x) dsDict[dsName] = data atr = multilook_attribute(atr, lks_y, lks_x) writefile.write(dsDict, out_file=outfile, metadata=atr, ref_file=infile) return outfile
def generate_ifgram_aux_file(self): """Generate auxiliary files from ifgramStack file""" stack_file = ut.check_loaded_dataset(self.workDir, print_msg=False)[1] dsNames = readfile.get_dataset_list(stack_file) mask_file = 'maskConnComp.h5' coh_file = 'avgSpatialCoh.h5' snr_file = 'avgSpatialSnr.h5' # 1) generate mask file from the common connected components if any('phase' in i.lower() for i in dsNames): scp_args = '{} --nonzero -o {} --update'.format(stack_file, mask_file) print('\ngenerate_mask.py', scp_args) mintpy.generate_mask.main(scp_args.split()) # 2) generate average spatial coherence if any('phase' in i.lower() for i in dsNames): scp_args = '{} --dataset coherence -o {} --update'.format(stack_file, coh_file) elif any('offset' in i.lower() for i in dsNames): scp_args = '{} --dataset offsetSNR -o {} --update'.format(stack_file, snr_file) print('\ntemporal_average.py', scp_args) mintpy.temporal_average.main(scp_args.split()) return
def run_geocode(self, step_name): """geocode data files in radar coordinates into ./geo folder.""" if self.template['mintpy.geocode']: ts_file = self.get_timeseries_filename(self.template)[step_name]['input'] atr = readfile.read_attribute(ts_file) if 'Y_FIRST' not in atr.keys(): # 1. geocode out_dir = os.path.join(self.workDir, 'geo') os.makedirs(out_dir, exist_ok=True) geom_file, lookup_file = ut.check_loaded_dataset(self.workDir, print_msg=False)[2:4] in_files = [geom_file, 'temporalCoherence.h5', 'avgSpatialCoh.h5', ts_file, 'velocity.h5'] iargs = ['-l', lookup_file, '-t', self.templateFile, '--outdir', out_dir, '--update'] for in_file in in_files: iargs += [in_file] print('geocode.py', ' '.join(iargs)) mintpy.geocode.main(iargs) # 2. generate reliable pixel mask in geo coordinate geom_file = os.path.join(out_dir, 'geo_{}'.format(os.path.basename(geom_file))) tcoh_file = os.path.join(out_dir, 'geo_temporalCoherence.h5') mask_file = os.path.join(out_dir, 'geo_maskTempCoh.h5') tcoh_min = self.template['mintpy.networkInversion.minTempCoh'] iargs = [tcoh_file, '-m', tcoh_min, '-o', mask_file] # exclude pixels in shadow if shadowMask dataset is available if (self.template['mintpy.networkInversion.shadowMask'] is True and 'shadowMask' in readfile.get_dataset_list(geom_file)): iargs += ['--base', geom_file, '--base-dataset', 'shadowMask', '--base-value', '1'] print('generate_mask.py', ' '.join(iargs)) if ut.run_or_skip(out_file=mask_file, in_file=tcoh_file) == 'run': mintpy.generate_mask.main(iargs) else: print('dataset is geocoded, skip geocoding and continue.') else: print('geocoding is OFF') return
def reference_file(inps): """Seed input file with option from input namespace Return output file name if succeed; otherwise, return None """ if not inps: inps = cmd_line_parse(['']) atr = readfile.read_attribute(inps.file) # update_mode if (not inps.force and inps.ref_y is not None and inps.ref_y == int(atr.get('REF_Y', -999)) and inps.ref_x is not None and inps.ref_x == int(atr.get('REF_X', -999))): print( 'SAME reference pixel is already selected/saved in file, skip updating.' ) return inps.file # Check 1 - stack and its non-nan mask pixel coverage # outFile=False --> no avgPhaseVelocity file is generated due to the lack of reference point info. # did not use maskConnComp.h5 because not all input dataset has connectComponent info if atr['FILE_TYPE'] == 'ifgramStack': ds_name = [ i for i in readfile.get_dataset_list(inps.file) if i in ['unwrapPhase', 'rangeOffset', 'azimuthOffset'] ][0] else: ds_name = None stack = ut.temporal_average(inps.file, datasetName=ds_name, updateMode=True, outFile=False)[0] mask = np.multiply(~np.isnan(stack), stack != 0.) if np.nansum(mask) == 0.0: raise ValueError( 'no pixel found with valid phase value in all datasets.') # Check 2 - input ref_y/x: location and validity if inps.ref_y is not None and inps.ref_x is not None: if mask[inps.ref_y, inps.ref_x] == 0.: raise ValueError( 'reference y/x have nan value in some dataset. Please re-select.' ) else: # Find reference y/x if inps.method == 'maxCoherence': inps.ref_y, inps.ref_x = select_max_coherence_yx( coh_file=inps.coherenceFile, mask=mask, min_coh=inps.minCoherence) elif inps.method == 'random': inps.ref_y, inps.ref_x = random_select_reference_yx(mask) elif inps.method == 'manual': inps = manual_select_reference_yx(stack, inps, mask) # Check ref_y/x from auto method if inps.ref_y is None or inps.ref_x is None: raise ValueError('ERROR: no reference y/x found.') # Seeding file with reference y/x atrNew = reference_point_attribute(atr, y=inps.ref_y, x=inps.ref_x) if not inps.write_data: print('Add/update ref_x/y attribute to file: ' + inps.file) print(atrNew) inps.outfile = ut.add_attribute(inps.file, atrNew) else: if not inps.outfile: inps.outfile = inps.file k = atr['FILE_TYPE'] fext = os.path.splitext(inps.file)[1] if fext == '.h5': if inps.outfile == inps.file: print('updating data value without re-writing to a new file') if k == 'ifgramStack': with h5py.File(inps.file, 'r+') as f: ds = f['unwrapPhase'] for i in range(ds.shape[0]): ds[i, :, :] -= ds[i, inps.ref_y, inps.ref_x] print('update metadata') f.attrs.update(atrNew) else: with h5py.File(inps.file, 'r+') as f: ds = f[k] if len(ds.shape) == 3: # 3D matrix for i in range(ds.shape[0]): ds[i, :, :] -= ds[i, inps.ref_y, inps.ref_x] else: # 2D matrix ds[:] -= ds[inps.ref_y, inps.ref_x] print('update metadata') f.attrs.update(atrNew) else: ## write to a new file print('writing the referenced data into file: {}'.format( inps.outfile)) # 1. read and update data value data, atr = readfile.read(inps.file) if len(data.shape) == 3: # 3D matrix for i in range(data.shape[0]): data[i, :, :] -= data[i, inps.ref_y, inps.ref_x] else: # 2D matrix data -= data[inps.ref_y, inps.ref_x] # 2. update metadata atr.update(atrNew) # 3. write to file writefile.write(data, inps.outfile, metadata=atr, ref_file=inps.file) else: # for binary file, over-write directly dis_names = ['phase', 'displacement'] ds_names = readfile.get_dataset_list(inps.file) ds_dict = {} for ds_name in ds_names: data = readfile.read(inps.file, datasetName=ds_name)[0] if ds_name in dis_names: data -= data[inps.ref_y, inps.ref_x] else: print( f"skip spatial referencing for {ds_name}, as it's not in {dis_names}" ) ds_dict[ds_name] = data atr.update(atrNew) writefile.write(ds_dict, out_file=inps.outfile, metadata=atr) ut.touch([inps.coherenceFile, inps.maskFile]) return inps.outfile
def ifgram_inversion_patch(ifgram_file, box=None, ref_phase=None, obs_ds_name='unwrapPhase', weight_func='var', water_mask_file=None, min_norm_velocity=True, mask_ds_name=None, mask_threshold=0.4, min_redundancy=1.0): """Invert one patch of an ifgram stack into timeseries. Parameters: box - tuple of 4 int, indicating (x0, y0, x1, y1) of the area of interest or None for the whole image ifgram_file - str, interferograms stack HDF5 file, e.g. ./inputs/ifgramStack.h5 ref_phase - 1D array in size of (num_ifgram), or None obs_ds_name - str, dataset to feed the inversion. weight_func - str, weight function, choose in ['no', 'fim', 'var', 'coh'] water_mask_file - str, water mask filename if available, to skip inversion on water min_norm_velocity - bool, minimize the residual phase or phase velocity mask_ds_name - str, dataset name in ifgram_file used to mask unwrapPhase pixelwisely mask_threshold - float, min coherence of pixels if mask_dataset_name='coherence' min_redundancy - float, the min number of ifgrams for every acquisition. Returns: ts - 3D array in size of (num_date, num_row, num_col) temp_coh - 2D array in size of (num_row, num_col) num_inv_ifg - 2D array in size of (num_row, num_col) box - tuple of 4 int Example: ifgram_inversion_patch('ifgramStack.h5', box=(0,200,1316,400)) """ stack_obj = ifgramStack(ifgram_file) stack_obj.open(print_msg=False) # debug #y, x = 258, 454 #box = (x, y, x+1, y+1) ## 1. input info # size if box: num_row = box[3] - box[1] num_col = box[2] - box[0] else: num_row = stack_obj.length num_col = stack_obj.width num_pixel = num_row * num_col # get tbase_diff date_list = stack_obj.get_date_list(dropIfgram=True) num_date = len(date_list) tbase = np.array(ptime.date_list2tbase(date_list)[0], np.float32) / 365.25 tbase_diff = np.diff(tbase).reshape(-1, 1) # design matrix date12_list = stack_obj.get_date12_list(dropIfgram=True) A, B = stack_obj.get_design_matrix4timeseries(date12_list=date12_list)[0:2] # prep for decor std time-series #if os.path.isfile('reference_date.txt'): # ref_date = str(np.loadtxt('reference_date.txt', dtype=bytes).astype(str)) #else: # ref_date = date_list[0] #Astd = stack_obj.get_design_matrix4timeseries(date12_list=date12_list, refDate=ref_date)[0] #ref_idx = date_list.index(ref_date) #time_idx = [i for i in range(num_date)] #time_idx.remove(ref_idx) # skip zero value in the network inversion for phase if 'phase' in obs_ds_name.lower(): skip_zero_value = True else: skip_zero_value = False # 1.1 read / calcualte weight if weight_func in ['no', 'sbas']: weight = None else: weight = calc_weight(stack_obj, box, weight_func=weight_func, dropIfgram=True, chunk_size=100000) # 1.2 read / mask unwrapPhase / offset pha_data = read_unwrap_phase(stack_obj, box, ref_phase, obs_ds_name=obs_ds_name, dropIfgram=True) pha_data = mask_unwrap_phase(pha_data, stack_obj, box, dropIfgram=True, mask_ds_name=mask_ds_name, mask_threshold=mask_threshold) # 1.3 mask of pixels to invert mask = np.ones(num_pixel, np.bool_) # 1.3.1 - Water Mask if water_mask_file: print('skip pixels on water with mask from file: {}'.format( os.path.basename(water_mask_file))) atr_msk = readfile.read_attribute(water_mask_file) len_msk, wid_msk = int(atr_msk['LENGTH']), int(atr_msk['WIDTH']) if (len_msk, wid_msk) != (stack_obj.length, stack_obj.width): raise ValueError( 'Input water mask file has different size from ifgramStack file.' ) dsName = [ i for i in readfile.get_dataset_list(water_mask_file) if i in ['waterMask', 'mask'] ][0] waterMask = readfile.read(water_mask_file, datasetName=dsName, box=box)[0].flatten() mask *= np.array(waterMask, dtype=np.bool_) del waterMask # 1.3.2 - Mask for Zero Phase in ALL ifgrams if 'phase' in obs_ds_name.lower(): print('skip pixels with zero/nan value in all interferograms') with warnings.catch_warnings(): # ignore warning message for all-NaN slices warnings.simplefilter("ignore", category=RuntimeWarning) phase_stack = np.nanmean(pha_data, axis=0) mask *= np.multiply(~np.isnan(phase_stack), phase_stack != 0.) del phase_stack # 1.3.3 invert pixels on mask 1+2 num_pixel2inv = int(np.sum(mask)) idx_pixel2inv = np.where(mask)[0] print('number of pixels to invert: {} out of {} ({:.1f}%)'.format( num_pixel2inv, num_pixel, num_pixel2inv / num_pixel * 100)) ## 2. inversion # 2.1 initiale the output matrices ts = np.zeros((num_date, num_pixel), np.float32) #ts_std = np.zeros((num_date, num_pixel), np.float32) temp_coh = np.zeros(num_pixel, np.float32) num_inv_ifg = np.zeros(num_pixel, np.int16) # return directly if there is nothing to invert if num_pixel2inv < 1: ts = ts.reshape(num_date, num_row, num_col) #ts_std = ts_std.reshape(num_date, num_row, num_col) temp_coh = temp_coh.reshape(num_row, num_col) num_inv_ifg = num_inv_ifg.reshape(num_row, num_col) return ts, temp_coh, num_inv_ifg # 2.2 un-weighted inversion (classic SBAS) if weight_func in ['no', 'sbas']: # a. mask for Non-Zero Phase in ALL ifgrams (share one B in sbas inversion) if 'phase' in obs_ds_name.lower(): mask_all_net = np.all(pha_data, axis=0) mask_all_net *= mask else: mask_all_net = np.array(mask) mask_part_net = mask ^ mask_all_net del mask # b. invert once for all pixels with obs in all ifgrams if np.sum(mask_all_net) > 0: print(('inverting pixels with valid phase in all ifgrams' ' ({:.0f} pixels) ...').format(np.sum(mask_all_net))) tsi, tcohi, num_ifgi = estimate_timeseries( A, B, tbase_diff, ifgram=pha_data[:, mask_all_net], weight_sqrt=None, min_norm_velocity=min_norm_velocity, min_redundancy=min_redundancy, skip_zero_value=skip_zero_value) ts[:, mask_all_net] = tsi temp_coh[mask_all_net] = tcohi num_inv_ifg[mask_all_net] = num_ifgi # c. pixel-by-pixel for pixels with obs not in all ifgrams if np.sum(mask_part_net) > 0: print(('inverting pixels with valid phase in some ifgrams' ' ({:.0f} pixels) ...').format(np.sum(mask_part_net))) num_pixel2inv = int(np.sum(mask_part_net)) idx_pixel2inv = np.where(mask_part_net)[0] prog_bar = ptime.progressBar(maxValue=num_pixel2inv) for i in range(num_pixel2inv): idx = idx_pixel2inv[i] tsi, tcohi, num_ifgi = estimate_timeseries( A, B, tbase_diff, ifgram=pha_data[:, idx], weight_sqrt=None, min_norm_velocity=min_norm_velocity, min_redundancy=min_redundancy, skip_zero_value=skip_zero_value) ts[:, idx] = tsi.flatten() temp_coh[idx] = tcohi num_inv_ifg[idx] = num_ifgi prog_bar.update(i + 1, every=2000, suffix='{}/{} pixels'.format( i + 1, num_pixel2inv)) prog_bar.close() # 2.3 weighted inversion - pixel-by-pixel else: print('inverting network of interferograms into time-series ...') prog_bar = ptime.progressBar(maxValue=num_pixel2inv) for i in range(num_pixel2inv): idx = idx_pixel2inv[i] tsi, tcohi, num_ifgi = estimate_timeseries( A, B, tbase_diff, ifgram=pha_data[:, idx], weight_sqrt=weight[:, idx], min_norm_velocity=min_norm_velocity, min_redundancy=min_redundancy, skip_zero_value=skip_zero_value) ts[:, idx] = tsi.flatten() temp_coh[idx] = tcohi num_inv_ifg[idx] = num_ifgi prog_bar.update(i + 1, every=2000, suffix='{}/{} pixels'.format(i + 1, num_pixel2inv)) prog_bar.close() del weight del pha_data ## 3. prepare output # 3.1 reshape ts = ts.reshape(num_date, num_row, num_col) #ts_std = ts_std.reshape(num_date, num_row, num_col) temp_coh = temp_coh.reshape(num_row, num_col) num_inv_ifg = num_inv_ifg.reshape(num_row, num_col) # 3.2 convert displacement unit to meter if obs_ds_name.startswith('unwrapPhase'): phase2range = -1 * float( stack_obj.metadata['WAVELENGTH']) / (4. * np.pi) ts *= phase2range print('converting LOS phase unit from radian to meter') elif obs_ds_name == 'azimuthOffset': az_pixel_size = ut.azimuth_ground_resolution(stack_obj.metadata) ts *= az_pixel_size print('converting azimuth offset unit from pixel ({:.2f} m) to meter'. format(az_pixel_size)) elif obs_ds_name == 'rangeOffset': rg_pixel_size = float(stack_obj.metadata['RANGE_PIXEL_SIZE']) ts *= rg_pixel_size print('converting range offset unit from pixel ({:.2f} m) to meter'. format(rg_pixel_size)) return ts, temp_coh, num_inv_ifg, box
def read_network_info(inps): ext = os.path.splitext(inps.file)[1] print('read temporal/spatial baseline info from file:', inps.file) ## 1. Read date, pbase, date12 and coherence if ext == '.h5': stack_obj = ifgramStack(inps.file) stack_obj.open() inps.date12List = stack_obj.get_date12_list(dropIfgram=False) inps.dateList = stack_obj.get_date_list(dropIfgram=False) inps.pbaseList = stack_obj.get_perp_baseline_timeseries( dropIfgram=False) if inps.dsetName in readfile.get_dataset_list(inps.file): inps.cohList = ut.spatial_average(inps.file, datasetName=inps.dsetName, maskFile=inps.maskFile, saveList=True, checkAoi=False)[0] elif inps.dsetName == 'pbase': inps.cohList = np.abs(stack_obj.pbaseIfgram).tolist() elif inps.dsetName == 'tbase': inps.cohList = stack_obj.tbaseIfgram.tolist() else: raise ValueError( f'{inps.dsetName} NOT found in file: {inps.file}!') elif ext == '.txt': inps.date12List = np.loadtxt(inps.file, dtype=bytes).astype(str)[:, 0].tolist() # date12List --> dateList mDates = [i.split('_')[0] for i in inps.date12List] sDates = [i.split('_')[1] for i in inps.date12List] inps.dateList = sorted(list(set(mDates + sDates))) # pbase12List + date12List --> pbaseList pbase12List = np.loadtxt(inps.file, dtype=bytes).astype(float)[:, 3] A = ifgramStack.get_design_matrix4timeseries(inps.date12List)[0] inps.pbaseList = np.zeros(len(inps.dateList), dtype=np.float32) inps.pbaseList[1:] = np.linalg.lstsq(A, np.array(pbase12List), rcond=None)[0] # cohList inps.cohList = np.loadtxt(inps.file, dtype=bytes).astype(float)[:, 1] else: raise ValueError('un-recognized input file extention:', ext) print('number of acquisitions: {}'.format(len(inps.dateList))) print('number of interferograms: {}'.format(len(inps.date12List))) print('shift all perp baseline by {} to zero mean for plotting'.format( np.mean(inps.pbaseList))) inps.pbaseList -= np.mean(inps.pbaseList) # Optional: Read dropped date12 / date inps.dateList_drop = [] inps.date12List_drop = [] if ext == '.h5': inps.date12List_keep = ifgramStack( inps.file).get_date12_list(dropIfgram=True) inps.date12List_drop = sorted( list(set(inps.date12List) - set(inps.date12List_keep))) print('-' * 50) print('number of interferograms marked as drop: {}'.format( len(inps.date12List_drop))) print('number of interferograms marked as keep: {}'.format( len(inps.date12List_keep))) mDates = [i.split('_')[0] for i in inps.date12List_keep] sDates = [i.split('_')[1] for i in inps.date12List_keep] inps.dateList_keep = sorted(list(set(mDates + sDates))) inps.dateList_drop = sorted( list(set(inps.dateList) - set(inps.dateList_keep))) print('number of acquisitions marked as drop: {}'.format( len(inps.dateList_drop))) if len(inps.dateList_drop) > 0: print(inps.dateList_drop) return inps
def run_geocode(inps): """geocode all input files""" start_time = time.time() # feed the largest file for resample object initiation ind_max = np.argmax([os.path.getsize(i) for i in inps.file]) # prepare geometry for geocoding res_obj = resample(lut_file=inps.lookupFile, src_file=inps.file[ind_max], SNWE=inps.SNWE, lalo_step=inps.laloStep, interp_method=inps.interpMethod, fill_value=inps.fillValue, nprocs=inps.nprocs, max_memory=inps.maxMemory, software=inps.software, print_msg=True) res_obj.open() res_obj.prepare() # resample input files one by one for infile in inps.file: print('-' * 50 + '\nresampling file: {}'.format(infile)) ext = os.path.splitext(infile)[1] atr = readfile.read_attribute(infile, datasetName=inps.dset) outfile = auto_output_filename(infile, inps) # update_mode if inps.updateMode: print('update mode: ON') if ut.run_or_skip(outfile, in_file=[infile, inps.lookupFile]) == 'skip': continue ## prepare output # update metadata if inps.radar2geo: atr = attr.update_attribute4radar2geo(atr, res_obj=res_obj) else: atr = attr.update_attribute4geo2radar(atr, res_obj=res_obj) # instantiate output file file_is_hdf5 = os.path.splitext(infile)[1] in ['.h5', '.he5'] if file_is_hdf5: writefile.layout_hdf5(outfile, metadata=atr, ref_file=infile) else: dsDict = dict() ## run dsNames = readfile.get_dataset_list(infile, datasetName=inps.dset) maxDigit = max([len(i) for i in dsNames]) for dsName in dsNames: if not file_is_hdf5: dsDict[dsName] = np.zeros((res_obj.length, res_obj.width)) # loop for block-by-block IO for i in range(res_obj.num_box): src_box = res_obj.src_box_list[i] dest_box = res_obj.dest_box_list[i] # read print('-' * 50 + '\nreading {d:<{w}} in block {b} from {f} ...'.format( d=dsName, w=maxDigit, b=src_box, f=os.path.basename(infile))) data = readfile.read(infile, datasetName=dsName, box=src_box, print_msg=False)[0] # resample data = res_obj.run_resample(src_data=data, box_ind=i) # write / save block data if data.ndim == 3: block = [ 0, data.shape[0], dest_box[1], dest_box[3], dest_box[0], dest_box[2] ] else: block = [ dest_box[1], dest_box[3], dest_box[0], dest_box[2] ] if file_is_hdf5: print('write data in block {} to file: {}'.format( block, outfile)) writefile.write_hdf5_block(outfile, data=data, datasetName=dsName, block=block, print_msg=False) else: dsDict[dsName][block[0]:block[1], block[2]:block[3]] = data # for binary file: ensure same data type if not file_is_hdf5: dsDict[dsName] = np.array(dsDict[dsName], dtype=data.dtype) # write binary file if not file_is_hdf5: writefile.write(dsDict, out_file=outfile, metadata=atr, ref_file=infile) m, s = divmod(time.time() - start_time, 60) print('time used: {:02.0f} mins {:02.1f} secs.\n'.format(m, s)) return outfile
def generate_temporal_coherence_mask(self): """Generate reliable pixel mask from temporal coherence""" geom_file = ut.check_loaded_dataset(self.workDir, print_msg=False)[2] tcoh_file = 'temporalCoherence.h5' mask_file = 'maskTempCoh.h5' tcoh_min = self.template['mintpy.networkInversion.minTempCoh'] scp_args = '{} -m {} -o {}'.format(tcoh_file, tcoh_min, mask_file) # exclude pixels in shadow if shadowMask dataset is available if 'shadowMask' in readfile.get_dataset_list(geom_file): scp_args += ' --base {} --base-dataset shadowMask --base-value 1'.format( geom_file) print('generate_mask.py', scp_args) # update mode: run only if: # 1) output file exists and newer than input file, AND # 2) all config keys are the same config_keys = ['mintpy.networkInversion.minTempCoh'] print('update mode: ON') flag = 'skip' if ut.run_or_skip(out_file=mask_file, in_file=tcoh_file, print_msg=False) == 'run': flag = 'run' else: print( '1) output file: {} already exists and newer than input file: {}' .format(mask_file, tcoh_file)) atr = readfile.read_attribute(mask_file) if any( str(self.template[i]) != atr.get(i, 'False') for i in config_keys): flag = 'run' print( '2) NOT all key configration parameters are the same: {}'. format(config_keys)) else: print('2) all key configuration parameters are the same: {}'. format(config_keys)) print('run or skip: {}'.format(flag)) if flag == 'run': mintpy.generate_mask.main(scp_args.split()) # update configKeys atr = {} for key in config_keys: atr[key] = self.template[key] ut.add_attribute(mask_file, atr) # check number of pixels selected in mask file for following analysis num_pixel = np.sum(readfile.read(mask_file)[0] != 0.) print('number of reliable pixels: {}'.format(num_pixel)) min_num_pixel = float( self.template['mintpy.networkInversion.minNumPixel']) if num_pixel < min_num_pixel: msg = "Not enough reliable pixels (minimum of {}). ".format( int(min_num_pixel)) msg += "Try the following:\n" msg += "1) Check the reference pixel and make sure it's not in areas with unwrapping errors\n" msg += "2) Check the network and make sure it's fully connected without subsets" raise RuntimeError(msg) return
def prepare_los_geometry(geom_file): """Prepare LOS geometry data/info in geo-coordinates Parameters: geom_file - str, path of geometry file Returns: inc_angle - 2D np.ndarray, incidence angle in radians head_angle - 2D np.ndarray, heading angle in radians atr - dict, metadata in geo-coordinate """ print('prepare LOS geometry in geo-coordinates from file: {}'.format( geom_file)) atr = readfile.read_attribute(geom_file) print('read incidenceAngle from file: {}'.format(geom_file)) inc_angle = readfile.read(geom_file, datasetName='incidenceAngle')[0] if 'azimuthAngle' in readfile.get_dataset_list(geom_file): print('read azimuthAngle from file: {}'.format(geom_file)) print('convert azimuth angle to heading angle') az_angle = readfile.read(geom_file, datasetName='azimuthAngle')[0] head_angle = ut.azimuth2heading_angle(az_angle) else: print('use the HEADING attribute as the mean heading angle') head_angle = np.ones(inc_angle.shape, dtype=np.float32) * float( atr['HEADING']) # geocode inc/az angle data if in radar-coord if 'Y_FIRST' not in atr.keys(): print('-' * 50) print('geocoding the incidence / heading angles ...') res_obj = resample(lut_file=geom_file, src_file=geom_file) res_obj.open() res_obj.prepare() # resample data box = res_obj.src_box_list[0] inc_angle = res_obj.run_resample(src_data=inc_angle[box[1]:box[3], box[0]:box[2]]) head_angle = res_obj.run_resample(src_data=head_angle[box[1]:box[3], box[0]:box[2]]) # update attribute atr = attr.update_attribute4radar2geo(atr, res_obj=res_obj) # for 'Y_FIRST' not in 'degree' # e.g. meters for UTM projection from ASF HyP3 if not atr['Y_UNIT'].lower().startswith('deg'): # get SNWE in meter length, width = int(atr['LENGTH']), int(atr['WIDTH']) N = float(atr['Y_FIRST']) W = float(atr['X_FIRST']) y_step = float(atr['Y_STEP']) x_step = float(atr['X_STEP']) S = N + y_step * length E = W + x_step * width # SNWE in meter --> degree lat0, lon0 = ut.to_latlon(atr['OG_FILE_PATH'], W, N) lat1, lon1 = ut.to_latlon(atr['OG_FILE_PATH'], E, S) lat_step = (lat1 - lat0) / length lon_step = (lon1 - lon0) / width # update Y/X_FIRST/STEP/UNIT atr['Y_FIRST'] = lat0 atr['X_FIRST'] = lon0 atr['Y_STEP'] = lat_step atr['X_STEP'] = lon_step atr['Y_UNIT'] = 'degrees' atr['X_UNIT'] = 'degrees' # unit: degree to radian inc_angle *= np.pi / 180. head_angle *= np.pi / 180. return inc_angle, head_angle, atr
def subset_file(fname, subset_dict_input, out_file=None): """Subset file with Inputs: fname : str, path/name of file out_file : str, path/name of output file subset_dict : dict, subsut parameter, including the following items: subset_x : list of 2 int, subset in x direction, default=None subset_y : list of 2 int, subset in y direction, default=None subset_lat : list of 2 float, subset in lat direction, default=None subset_lon : list of 2 float, subset in lon direction, default=None fill_value : float, optional. filled value for area outside of data coverage. default=None None/not-existed to subset within data coverage only. tight : bool, tight subset or not, for lookup table file, i.e. geomap*.trans Outputs: out_file : str, path/name of output file; out_file = 'subset_'+fname, if fname is in current directory; out_file = fname, if fname is not in the current directory. """ # Input File Info try: atr = readfile.read_attribute(fname) except: return None width = int(atr['WIDTH']) length = int(atr['LENGTH']) k = atr['FILE_TYPE'] print('subset ' + k + ' file: ' + fname + ' ...') subset_dict = subset_dict_input.copy() # Read Subset Inputs into 4-tuple box in pixel and geo coord pix_box, geo_box = subset_input_dict2box(subset_dict, atr) coord = ut.coordinate(atr) # if fill_value exists and not None, subset data and fill assigned value for area out of its coverage. # otherwise, re-check subset to make sure it's within data coverage and initialize the matrix with np.nan outfill = False if 'fill_value' in subset_dict.keys() and subset_dict['fill_value']: outfill = True else: outfill = False if not outfill: pix_box = coord.check_box_within_data_coverage(pix_box) subset_dict['fill_value'] = np.nan geo_box = coord.box_pixel2geo(pix_box) data_box = (0, 0, width, length) print('data range in y/x: ' + str(data_box)) print('subset range in y/x: ' + str(pix_box)) print('data range in lat/lon: ' + str(coord.box_pixel2geo(data_box))) print('subset range in lat/lon: ' + str(geo_box)) if pix_box == data_box: print('Subset range == data coverage, no need to subset. Skip.') return fname # Calculate Subset/Overlap Index pix_box4data, pix_box4subset = get_box_overlap_index(data_box, pix_box) ########################### Data Read and Write ###################### # Output File Name if not out_file: if os.getcwd() == os.path.dirname(os.path.abspath(fname)): if 'tight' in subset_dict.keys() and subset_dict['tight']: out_file = '{}_tight{}'.format( os.path.splitext(fname)[0], os.path.splitext(fname)[1]) else: out_file = 'subset_' + os.path.basename(fname) else: out_file = os.path.basename(fname) print('writing >>> ' + out_file) # subset datasets one by one dsNames = readfile.get_dataset_list(fname) maxDigit = max([len(i) for i in dsNames]) dsDict = dict() for dsName in dsNames: print('subsetting {d:<{w}} from {f} ...'.format( d=dsName, w=maxDigit, f=os.path.basename(fname))) data = readfile.read(fname, datasetName=dsName, print_msg=False)[0] # subset 2D data if len(data.shape) == 2: data_overlap = data[pix_box4data[1]:pix_box4data[3], pix_box4data[0]:pix_box4data[2]] data = np.ones((pix_box[3] - pix_box[1], pix_box[2] - pix_box[0]), data.dtype) * subset_dict['fill_value'] data[pix_box4subset[1]:pix_box4subset[3], pix_box4subset[0]:pix_box4subset[2]] = data_overlap # subset 3D data elif len(data.shape) == 3: data_overlap = data[:, pix_box4data[1]:pix_box4data[3], pix_box4data[0]:pix_box4data[2]] data = np.ones( (data.shape[0], pix_box[3] - pix_box[1], pix_box[2] - pix_box[0]), data.dtype) * subset_dict['fill_value'] data[:, pix_box4subset[1]:pix_box4subset[3], pix_box4subset[0]:pix_box4subset[2]] = data_overlap dsDict[dsName] = data atr = ut.subset_attribute(atr, pix_box) writefile.write(dsDict, out_file=out_file, metadata=atr, ref_file=fname) return out_file
def run_geocode(inps): """geocode all input files""" start_time = time.time() # Prepare geometry for geocoding res_obj = resample(lookupFile=inps.lookupFile, dataFile=inps.file[0], SNWE=inps.SNWE, laloStep=inps.laloStep, processor=inps.processor) res_obj.open() # resample input files one by one for infile in inps.file: print('-' * 50 + '\nresampling file: {}'.format(infile)) ext = os.path.splitext(infile)[1] atr = readfile.read_attribute(infile, datasetName=inps.dset) outfile = auto_output_filename(infile, inps) if inps.updateMode and ut.run_or_skip( outfile, in_file=[infile, inps.lookupFile]) == 'skip': print('update mode is ON, skip geocoding.') continue # read source data and resample dsNames = readfile.get_dataset_list(infile, datasetName=inps.dset) maxDigit = max([len(i) for i in dsNames]) dsResDict = dict() for dsName in dsNames: print('reading {d:<{w}} from {f} ...'.format( d=dsName, w=maxDigit, f=os.path.basename(infile))) if ext in ['.h5', '.he5']: data = readfile.read(infile, datasetName=dsName, print_msg=False)[0] else: data, atr = readfile.read(infile, datasetName=dsName, print_msg=False) # keep timeseries data as 3D matrix when there is only one acquisition # because readfile.read() will squeeze it to 2D if atr['FILE_TYPE'] == 'timeseries' and len(data.shape) == 2: data = np.reshape(data, (1, data.shape[0], data.shape[1])) res_data = res_obj.run_resample(src_data=data, interp_method=inps.interpMethod, fill_value=inps.fillValue, nprocs=inps.nprocs, print_msg=True) dsResDict[dsName] = res_data # update metadata if inps.radar2geo: atr = metadata_radar2geo(atr, res_obj) else: atr = metadata_geo2radar(atr, res_obj) #if len(dsNames) == 1 and dsName not in ['timeseries']: # atr['FILE_TYPE'] = dsNames[0] # infile = None writefile.write(dsResDict, out_file=outfile, metadata=atr, ref_file=infile) m, s = divmod(time.time() - start_time, 60) print('time used: {:02.0f} mins {:02.1f} secs.\n'.format(m, s)) return outfile
def add_file(fnames, out_file=None, force=False): """Generate sum of all input files Parameters: fnames : list of str, path/name of input files to be added out_file : str, optional, path/name of output file Returns: out_file : str, path/name of output file Example: 'mask_all.h5' = add_file(['mask_1.h5','mask_2.h5','mask_3.h5'], 'mask_all.h5') """ # Default output file name ext = os.path.splitext(fnames[0])[1] if not out_file: out_file = os.path.splitext(fnames[0])[0] for i in range(1, len(fnames)): out_file += '_plus_' + os.path.splitext(os.path.basename(fnames[i]))[0] out_file += ext # read FILE_TYPE ftypes = [readfile.read_attribute(x)['FILE_TYPE'] for x in fnames] print(f'input file types: {ftypes}') if ftypes[0] == 'timeseries': # check dates shared by two timeseries files file1, file2 = fnames[0], fnames[1] atr1 = readfile.read_attribute(file1) atr2 = readfile.read_attribute(file2) dateList1 = timeseries(file1).get_date_list() dateList2 = timeseries(file2).get_date_list() dateListShared = [i for i in dateList1 if i in dateList2] dateShared = np.ones((len(dateList1)), dtype=np.bool_) if dateListShared != dateList1: print('WARNING: {} does not contain all dates in {}'.format(file2, file1)) if force: dateListEx = list(set(dateList1) - set(dateListShared)) print('Continue and enforce the differencing for their shared dates only.') print('\twith following dates are ignored for differencing:\n{}'.format(dateListEx)) dateShared[np.array([dateList1.index(i) for i in dateListEx])] = 0 else: raise Exception('To enforce the differencing anyway, use --force option.') # check reference point ref_date, ref_y, ref_x = check_reference(atr1, atr2) # read data2 (consider different reference_date/pixel) print('read from file: {}'.format(file2)) data2 = readfile.read(file2, datasetName=dateListShared)[0] if ref_y and ref_x: print('* referencing data from {} to y/x: {}/{}'.format(os.path.basename(file2), ref_y, ref_x)) ref_box = (ref_x, ref_y, ref_x + 1, ref_y + 1) ref_val = readfile.read(file2, datasetName=dateListShared, box=ref_box)[0] data2 -= np.tile(ref_val.reshape(-1, 1, 1), (1, data2.shape[1], data2.shape[2])) if ref_date: print('* referencing data from {} to date: {}'.format(os.path.basename(file2), ref_date)) ref_ind = dateListShared.index(ref_date) data2 -= np.tile(data2[ref_ind, :, :], (data2.shape[0], 1, 1)) # read data1 print('read from file: {}'.format(file1)) data = readfile.read(file1)[0] # apply adding mask = data == 0. data[dateShared] += data2 data[mask] = 0. # Do not change zero phase value del data2 # write file writefile.write(data, out_file=out_file, metadata=atr1, ref_file=file1) else: # get common dataset list ds_names_list = [readfile.get_dataset_list(x) for x in fnames] ds_names = list(set.intersection(*map(set, ds_names_list))) # if all files have one dataset, ignore dataset name variation and take the 1st one as reference if all(len(x) == 1 for x in ds_names_list): ds_names = ds_names_list[0] print('List of common datasets across files: ', ds_names) if len(ds_names) < 1: raise ValueError('No common datasets found among files:\n{}'.format(fnames)) # loop over each file dsDict = {} for ds_name in ds_names: print('adding {} ...'.format(ds_name)) data, atr = readfile.read(fnames[0], datasetName=ds_name) for i, fname in enumerate(fnames[1:]): # ignore ds_name if input file has single dataset ds_name2read = None if len(ds_names_list[i+1]) == 1 else ds_name # read data2 = readfile.read(fname, datasetName=ds_name2read)[0] # apply operation data = add_matrix(data, data2) dsDict[ds_name] = data # output print('use metadata from the 1st file: {}'.format(fnames[0])) writefile.write(dsDict, out_file=out_file, metadata=atr, ref_file=fnames[0]) return out_file
def subset_file(fname, subset_dict_input, out_file=None): """Subset file with Inputs: fname : str, path/name of file out_file : str, path/name of output file subset_dict : dict, subsut parameter, including the following items: subset_x : list of 2 int, subset in x direction, default=None subset_y : list of 2 int, subset in y direction, default=None subset_lat : list of 2 float, subset in lat direction, default=None subset_lon : list of 2 float, subset in lon direction, default=None fill_value : float, optional. filled value for area outside of data coverage. default=None None/not-existed to subset within data coverage only. tight : bool, tight subset or not, for lookup table file, i.e. geomap*.trans Outputs: out_file : str, path/name of output file; out_file = 'subset_'+fname, if fname is in current directory; out_file = fname, if fname is not in the current directory. """ # Input File Info atr = readfile.read_attribute(fname) width = int(atr['WIDTH']) length = int(atr['LENGTH']) k = atr['FILE_TYPE'] print('subset ' + k + ' file: ' + fname + ' ...') subset_dict = subset_dict_input.copy() # Read Subset Inputs into 4-tuple box in pixel and geo coord pix_box, geo_box = subset_input_dict2box(subset_dict, atr) coord = ut.coordinate(atr) # if fill_value exists and not None, subset data and fill assigned value for area out of its coverage. # otherwise, re-check subset to make sure it's within data coverage and initialize the matrix with np.nan outfill = False if 'fill_value' in subset_dict.keys() and subset_dict['fill_value']: outfill = True else: outfill = False if not outfill: pix_box = coord.check_box_within_data_coverage(pix_box) subset_dict['fill_value'] = np.nan geo_box = coord.box_pixel2geo(pix_box) data_box = (0, 0, width, length) print('data range in (x0,y0,x1,y1): {}'.format(data_box)) print('subset range in (x0,y0,x1,y1): {}'.format(pix_box)) print('data range in (W, N, E, S): {}'.format( coord.box_pixel2geo(data_box))) print('subset range in (W, N, E, S): {}'.format(geo_box)) if pix_box == data_box: print('Subset range == data coverage, no need to subset. Skip.') return fname # Calculate Subset/Overlap Index pix_box4data, pix_box4subset = get_box_overlap_index(data_box, pix_box) ########################### Data Read and Write ###################### # Output File Name if not out_file: if os.getcwd() == os.path.dirname(os.path.abspath(fname)): if 'tight' in subset_dict.keys() and subset_dict['tight']: out_file = '{}_tight{}'.format( os.path.splitext(fname)[0], os.path.splitext(fname)[1]) else: out_file = 'sub_' + os.path.basename(fname) else: out_file = os.path.basename(fname) print('writing >>> ' + out_file) # update metadata atr = attr.update_attribute4subset(atr, pix_box) # subset datasets one by one dsNames = readfile.get_dataset_list(fname) maxDigit = max([len(i) for i in dsNames]) ext = os.path.splitext(out_file)[1] if ext in ['.h5', '.he5']: # initiate the output file writefile.layout_hdf5(out_file, metadata=atr, ref_file=fname) # subset dataset one-by-one for dsName in dsNames: with h5py.File(fname, 'r') as fi: ds = fi[dsName] ds_shape = ds.shape ds_ndim = ds.ndim print('cropping {d} in {b} from {f} ...'.format( d=dsName, b=pix_box4data, f=os.path.basename(fname))) if ds_ndim == 2: # read data = ds[pix_box4data[1]:pix_box4data[3], pix_box4data[0]:pix_box4data[2]] # crop data_out = np.ones( (pix_box[3] - pix_box[1], pix_box[2] - pix_box[0]), data.dtype) * subset_dict['fill_value'] data_out[pix_box4subset[1]:pix_box4subset[3], pix_box4subset[0]:pix_box4subset[2]] = data data_out = np.array(data_out, dtype=data.dtype) # write block = [0, int(atr['LENGTH']), 0, int(atr['WIDTH'])] writefile.write_hdf5_block(out_file, data=data_out, datasetName=dsName, block=block, print_msg=True) if ds_ndim == 3: prog_bar = ptime.progressBar(maxValue=ds_shape[0]) for i in range(ds_shape[0]): # read data = ds[i, pix_box4data[1]:pix_box4data[3], pix_box4data[0]:pix_box4data[2]] # crop data_out = np.ones( (1, pix_box[3] - pix_box[1], pix_box[2] - pix_box[0]), data.dtype) * subset_dict['fill_value'] data_out[:, pix_box4subset[1]:pix_box4subset[3], pix_box4subset[0]:pix_box4subset[2]] = data # write block = [ i, i + 1, 0, int(atr['LENGTH']), 0, int(atr['WIDTH']) ] writefile.write_hdf5_block(out_file, data=data_out, datasetName=dsName, block=block, print_msg=False) prog_bar.update(i + 1, suffix='{}/{}'.format( i + 1, ds_shape[0])) prog_bar.close() print('finished writing to file: {}'.format(out_file)) else: # IO for binary files dsDict = dict() for dsName in dsNames: dsDict[dsName] = subset_dataset( fname, dsName, pix_box, pix_box4data, pix_box4subset, fill_value=subset_dict['fill_value']) writefile.write(dsDict, out_file=out_file, metadata=atr, ref_file=fname) # write extra metadata files for ISCE data files if os.path.isfile(fname + '.xml') or os.path.isfile(fname + '.aux.xml'): # write ISCE XML file dtype_gdal = readfile.NUMPY2GDAL_DATATYPE[atr['DATA_TYPE']] dtype_isce = readfile.GDAL2ISCE_DATATYPE[dtype_gdal] writefile.write_isce_xml(out_file, width=int(atr['WIDTH']), length=int(atr['LENGTH']), bands=len(dsDict.keys()), data_type=dtype_isce, scheme=atr['scheme'], image_type=atr['FILE_TYPE']) print(f'write file: {out_file}.xml') # write GDAL VRT file if os.path.isfile(fname + '.vrt'): from isceobj.Util.ImageUtil import ImageLib as IML img = IML.loadImage(out_file)[0] img.renderVRT() print(f'write file: {out_file}.vrt') return out_file
def multilook_file(infile, lks_y, lks_x, outfile=None, margin=[0, 0, 0, 0]): """ Multilook input file Parameters: infile - str, path of input file to be multilooked. lks_y - int, number of looks in y / row direction. lks_x - int, number of looks in x / column direction. margin - list of 4 int, number of pixels to be skipped during multilooking. useful for offset product, where the marginal pixels are ignored during cross correlation matching. outfile - str, path of output file Returns: outfile - str, path of output file """ lks_y = int(lks_y) lks_x = int(lks_x) # input file info atr = readfile.read_attribute(infile) length, width = int(atr['LENGTH']), int(atr['WIDTH']) k = atr['FILE_TYPE'] print('multilooking {} {} file: {}'.format(atr['PROCESSOR'], k, infile)) print('number of looks in y / azimuth direction: %d' % lks_y) print('number of looks in x / range direction: %d' % lks_x) # margin --> box if margin is not [0, 0, 0, 0]: # top, bottom, left, right box = (margin[2], margin[0], width - margin[3], length - margin[1]) print( 'number of pixels to skip in top/bottom/left/right boundaries: {}'. format(margin)) else: box = (0, 0, width, length) # output file name if not outfile: if os.getcwd() == os.path.dirname(os.path.abspath(infile)): ext = os.path.splitext(infile)[1] outfile = os.path.splitext(infile)[0] + '_' + str( lks_y) + 'alks_' + str(lks_x) + 'rlks' + ext else: outfile = os.path.basename(infile) #print('writing >>> '+outfile) # read source data and multilooking dsNames = readfile.get_dataset_list(infile) maxDigit = max([len(i) for i in dsNames]) dsDict = dict() for dsName in dsNames: print('multilooking {d:<{w}} from {f} ...'.format( d=dsName, w=maxDigit, f=os.path.basename(infile))) data = readfile.read(infile, datasetName=dsName, box=box, print_msg=False)[0] # keep timeseries data as 3D matrix when there is only one acquisition # because readfile.read() will squeeze it to 2D if atr['FILE_TYPE'] == 'timeseries' and len(data.shape) == 2: data = np.reshape(data, (1, data.shape[0], data.shape[1])) data = multilook_data(data, lks_y, lks_x) dsDict[dsName] = data # update metadata atr = multilook_attribute(atr, lks_y, lks_x, box=box) # for binary file with 2 bands, always use BIL scheme if (len(dsDict.keys()) == 2 and os.path.splitext(infile)[1] not in ['.h5', '.he5'] and atr.get('scheme', 'BIL').upper() != 'BIL'): print('the input binary file has 2 bands with band interleave as: {}'. format(atr['scheme'])) print( 'for the output binary file, change the band interleave to BIL as default.' ) atr['scheme'] = 'BIL' writefile.write(dsDict, out_file=outfile, metadata=atr, ref_file=infile) return outfile
def diff_file(file1, file2, out_file=None, force=False, max_num_pixel=2e8): """calculate/write file1 - file2 Parameters: file1 - str, path of file1 file2 - list of str, path of file2(s) out_file - str, path of output file force - bool, overwrite existing output file max_num_pixel - float, maximum number of pixels for each block """ start_time = time.time() if not out_file: fbase, fext = os.path.splitext(file1) if len(file2) > 1: raise ValueError('Output file name is needed for more than 2 files input.') out_file = '{}_diff_{}{}'.format(fbase, os.path.splitext(os.path.basename(file2[0]))[0], fext) print('{} - {} --> {}'.format(file1, file2, out_file)) # Read basic info atr1 = readfile.read_attribute(file1) atr2 = readfile.read_attribute(file2[0]) k1 = atr1['FILE_TYPE'] k2 = atr2['FILE_TYPE'] print('the 1st input file is: {}'.format(k1)) if k1 == 'timeseries': if k2 not in ['timeseries', 'giantTimeseries']: raise Exception('Input multiple dataset files are not the same file type!') atr1 = readfile.read_attribute(file1) atr2 = readfile.read_attribute(file2[0]) dateList1 = timeseries(file1).get_date_list() if k2 == 'timeseries': dateList2 = timeseries(file2[0]).get_date_list() unit_fac = 1. elif k2 == 'giantTimeseries': dateList2 = giantTimeseries(file2[0]).get_date_list() unit_fac = 0.001 # check reference point ref_date, ref_y, ref_x = check_reference(atr1, atr2) # check dates shared by two timeseries files dateListShared = [i for i in dateList1 if i in dateList2] dateShared = np.ones((len(dateList1)), dtype=np.bool_) if dateListShared != dateList1: print('WARNING: {} does not contain all dates in {}'.format(file2, file1)) if force: dateListEx = list(set(dateList1) - set(dateListShared)) print('Continue and enforce the differencing for their shared dates only.') print('\twith following dates are ignored for differencing:\n{}'.format(dateListEx)) dateShared[np.array([dateList1.index(i) for i in dateListEx])] = 0 else: raise Exception('To enforce the differencing anyway, use --force option.') # instantiate the output file writefile.layout_hdf5(out_file, ref_file=file1) # block-by-block IO length, width = int(atr1['LENGTH']), int(atr1['WIDTH']) num_box = int(np.ceil(len(dateList1) * length * width / max_num_pixel)) box_list = cluster.split_box2sub_boxes(box=(0, 0, width, length), num_split=num_box, dimension='y', print_msg=True) if ref_y and ref_x: ref_box = (ref_x, ref_y, ref_x + 1, ref_y + 1) ref_val = readfile.read(file2[0], datasetName=dateListShared, box=ref_box)[0] * unit_fac for i, box in enumerate(box_list): if num_box > 1: print('\n------- processing patch {} out of {} --------------'.format(i+1, num_box)) print('box: {}'.format(box)) # read data2 (consider different reference_date/pixel) print('read from file: {}'.format(file2[0])) data2 = readfile.read(file2[0], datasetName=dateListShared, box=box)[0] * unit_fac if ref_y and ref_x: print('* referencing data from {} to y/x: {}/{}'.format(os.path.basename(file2[0]), ref_y, ref_x)) data2 -= np.tile(ref_val.reshape(-1, 1, 1), (1, data2.shape[1], data2.shape[2])) if ref_date: print('* referencing data from {} to date: {}'.format(os.path.basename(file2[0]), ref_date)) ref_ind = dateListShared.index(ref_date) data2 -= np.tile(data2[ref_ind, :, :], (data2.shape[0], 1, 1)) # read data1 print('read from file: {}'.format(file1)) data = readfile.read(file1, box=box)[0] # apply differencing mask = data == 0. data[dateShared] -= data2 data[mask] = 0. # Do not change zero phase value del data2 # write the block block = [0, data.shape[0], box[1], box[3], box[0], box[2]] writefile.write_hdf5_block(out_file, data=data, datasetName=k1, block=block) elif all(i == 'ifgramStack' for i in [k1, k2]): obj1 = ifgramStack(file1) obj1.open() obj2 = ifgramStack(file2[0]) obj2.open() ds_names = list(set(obj1.datasetNames) & set(obj2.datasetNames)) if len(ds_names) == 0: raise ValueError('no common dataset between two files!') ds_name = [i for i in ifgramDatasetNames if i in ds_names][0] # read data print('reading {} from file {} ...'.format(ds_name, file1)) data1 = readfile.read(file1, datasetName=ds_name)[0] print('reading {} from file {} ...'.format(ds_name, file2[0])) data2 = readfile.read(file2[0], datasetName=ds_name)[0] # consider reference pixel if 'unwrapphase' in ds_name.lower(): print('referencing to pixel ({},{}) ...'.format(obj1.refY, obj1.refX)) ref1 = data1[:, obj1.refY, obj1.refX] ref2 = data2[:, obj2.refY, obj2.refX] for i in range(data1.shape[0]): data1[i,:][data1[i, :] != 0.] -= ref1[i] data2[i,:][data2[i, :] != 0.] -= ref2[i] # operation and ignore zero values data1[data1 == 0] = np.nan data2[data2 == 0] = np.nan data = data1 - data2 del data1, data2 data[np.isnan(data)] = 0. # write to file dsDict = {} dsDict[ds_name] = data writefile.write(dsDict, out_file=out_file, ref_file=file1) else: # get common dataset list ds_names_list = [readfile.get_dataset_list(x) for x in [file1] + file2] ds_names = list(set.intersection(*map(set, ds_names_list))) # if all files have one dataset, ignore dataset name variation and take the 1st one as reference if all(len(x) == 1 for x in ds_names_list): ds_names = ds_names_list[0] print('List of common datasets across files: ', ds_names) if len(ds_names) < 1: raise ValueError('No common datasets found among files:\n{}'.format([file1] + file2)) # loop over each file dsDict = {} for ds_name in ds_names: print('adding {} ...'.format(ds_name)) data = readfile.read(file1, datasetName=ds_name)[0] dtype = data.dtype for i, fname in enumerate(file2): # ignore ds_name if input file has single dataset ds_name2read = None if len(ds_names_list[i+1]) == 1 else ds_name # read data2 = readfile.read(fname, datasetName=ds_name2read)[0] # convert to float32 to apply the operation because some types, e.g. bool, do not support it. # then convert back to the original data type data = np.array(data, dtype=np.float32) - np.array(data2, dtype=np.float32) # save data in the same type as the 1st file dsDict[ds_name] = np.array(data, dtype=dtype) # output print('use metadata from the 1st file: {}'.format(file1)) writefile.write(dsDict, out_file=out_file, metadata=atr1, ref_file=file1) m, s = divmod(time.time()-start_time, 60) print('time used: {:02.0f} mins {:02.1f} secs'.format(m, s)) return out_file
def multilook_file(infile, lks_y, lks_x, outfile=None, method='average', margin=[0, 0, 0, 0], max_memory=4): """ Multilook input file Parameters: infile - str, path of input file to be multilooked. lks_y - int, number of looks in y / row direction. lks_x - int, number of looks in x / column direction. margin - list of 4 int, number of pixels to be skipped during multilooking. useful for offset product, where the marginal pixels are ignored during cross correlation matching. outfile - str, path of output file Returns: outfile - str, path of output file """ lks_y = int(lks_y) lks_x = int(lks_x) # input file info atr = readfile.read_attribute(infile) length, width = int(atr['LENGTH']), int(atr['WIDTH']) k = atr['FILE_TYPE'] print('multilooking {} {} file: {}'.format(atr['PROCESSOR'], k, infile)) print('number of looks in y / azimuth direction: %d' % lks_y) print('number of looks in x / range direction: %d' % lks_x) print('multilook method: {}'.format(method)) # margin --> box if margin is not [0, 0, 0, 0]: # top, bottom, left, right box = (margin[2], margin[0], width - margin[3], length - margin[1]) print( 'number of pixels to skip in top/bottom/left/right boundaries: {}'. format(margin)) else: box = (0, 0, width, length) # output file name ext = os.path.splitext(infile)[1] if not outfile: if os.getcwd() == os.path.dirname(os.path.abspath(infile)): outfile = os.path.splitext(infile)[0] + '_' + str( lks_y) + 'alks_' + str(lks_x) + 'rlks' + ext else: outfile = os.path.basename(infile) # update metadata atr = attr.update_attribute4multilook(atr, lks_y, lks_x, box=box) if ext in ['.h5', '.he5']: writefile.layout_hdf5(outfile, metadata=atr, ref_file=infile) # read source data and multilooking dsNames = readfile.get_dataset_list(infile) maxDigit = max([len(i) for i in dsNames]) dsDict = dict() for dsName in dsNames: print('multilooking {d:<{w}} from {f} ...'.format( d=dsName, w=maxDigit, f=os.path.basename(infile))) # split in Y/row direction for IO for HDF5 only if ext in ['.h5', '.he5']: # calc step size with memory usage up to 4 GB with h5py.File(infile, 'r') as f: ds = f[dsName] ds_size = np.prod(ds.shape) * 4 num_step = int(np.ceil(ds_size * 4 / (max_memory * 1024**3))) row_step = int(np.rint(length / num_step / 10) * 10) row_step = max(row_step, 10) else: row_step = box[3] - box[1] num_step = int(np.ceil((box[3] - box[1]) / (row_step * lks_y))) for i in range(num_step): r0 = box[1] + row_step * lks_y * i r1 = box[1] + row_step * lks_y * (i + 1) r1 = min(r1, box[3]) # IO box box_i = (box[0], r0, box[2], r1) box_o = (int((box[0] - box[0]) / lks_x), int( (r0 - box[1]) / lks_y), int( (box[2] - box[0]) / lks_x), int((r1 - box[1]) / lks_y)) print('box: {}'.format(box_o)) # read / multilook if method == 'nearest': data = readfile.read(infile, datasetName=dsName, box=box_i, xstep=lks_x, ystep=lks_y, print_msg=False)[0] else: data = readfile.read(infile, datasetName=dsName, box=box_i, print_msg=False)[0] data = multilook_data(data, lks_y, lks_x) # output block if data.ndim == 3: block = [ 0, data.shape[0], box_o[1], box_o[3], box_o[0], box_o[2] ] else: block = [box_o[1], box_o[3], box_o[0], box_o[2]] # write if ext in ['.h5', '.he5']: writefile.write_hdf5_block(outfile, data=data, datasetName=dsName, block=block, print_msg=False) else: dsDict[dsName] = data # for binary file with 2 bands, always use BIL scheme if (len(dsDict.keys()) == 2 and os.path.splitext(infile)[1] not in ['.h5', '.he5'] and atr.get('scheme', 'BIL').upper() != 'BIL'): print('the input binary file has 2 bands with band interleave as: {}'. format(atr['scheme'])) print( 'for the output binary file, change the band interleave to BIL as default.' ) atr['scheme'] = 'BIL' if ext not in ['.h5', '.he5']: writefile.write(dsDict, out_file=outfile, metadata=atr, ref_file=infile) # write extra metadata files for ISCE data files if os.path.isfile(infile + '.xml') or os.path.isfile(infile + '.aux.xml'): # write ISCE XML file dtype_gdal = readfile.NUMPY2GDAL_DATATYPE[atr['DATA_TYPE']] dtype_isce = readfile.GDAL2ISCE_DATATYPE[dtype_gdal] writefile.write_isce_xml(outfile, width=int(atr['WIDTH']), length=int(atr['LENGTH']), bands=len(dsDict.keys()), data_type=dtype_isce, scheme=atr['scheme'], image_type=atr['FILE_TYPE']) print(f'write file: {outfile}.xml') # write GDAL VRT file if os.path.isfile(infile + '.vrt'): from isceobj.Util.ImageUtil import ImageLib as IML img = IML.loadImage(outfile)[0] img.renderVRT() print(f'write file: {outfile}.vrt') return outfile
def subset_file(fname, subset_dict_input, out_file=None): """Subset file with Inputs: fname : str, path/name of file out_file : str, path/name of output file subset_dict : dict, subsut parameter, including the following items: subset_x : list of 2 int, subset in x direction, default=None subset_y : list of 2 int, subset in y direction, default=None subset_lat : list of 2 float, subset in lat direction, default=None subset_lon : list of 2 float, subset in lon direction, default=None fill_value : float, optional. filled value for area outside of data coverage. default=None None/not-existed to subset within data coverage only. tight : bool, tight subset or not, for lookup table file, i.e. geomap*.trans Outputs: out_file : str, path/name of output file; out_file = 'subset_'+fname, if fname is in current directory; out_file = fname, if fname is not in the current directory. """ # Input File Info try: atr = readfile.read_attribute(fname) except: return None width = int(atr['WIDTH']) length = int(atr['LENGTH']) k = atr['FILE_TYPE'] print('subset '+k+' file: '+fname+' ...') subset_dict = subset_dict_input.copy() # Read Subset Inputs into 4-tuple box in pixel and geo coord pix_box, geo_box = subset_input_dict2box(subset_dict, atr) coord = ut.coordinate(atr) # if fill_value exists and not None, subset data and fill assigned value for area out of its coverage. # otherwise, re-check subset to make sure it's within data coverage and initialize the matrix with np.nan outfill = False if 'fill_value' in subset_dict.keys() and subset_dict['fill_value']: outfill = True else: outfill = False if not outfill: pix_box = coord.check_box_within_data_coverage(pix_box) subset_dict['fill_value'] = np.nan geo_box = coord.box_pixel2geo(pix_box) data_box = (0, 0, width, length) print('data range in y/x: '+str(data_box)) print('subset range in y/x: '+str(pix_box)) print('data range in lat/lon: '+str(coord.box_pixel2geo(data_box))) print('subset range in lat/lon: '+str(geo_box)) if pix_box == data_box: print('Subset range == data coverage, no need to subset. Skip.') return fname # Calculate Subset/Overlap Index pix_box4data, pix_box4subset = get_box_overlap_index(data_box, pix_box) ########################### Data Read and Write ###################### # Output File Name if not out_file: if os.getcwd() == os.path.dirname(os.path.abspath(fname)): if 'tight' in subset_dict.keys() and subset_dict['tight']: out_file = '{}_tight{}'.format(os.path.splitext(fname)[0], os.path.splitext(fname)[1]) else: out_file = 'subset_'+os.path.basename(fname) else: out_file = os.path.basename(fname) print('writing >>> '+out_file) # subset datasets one by one dsNames = readfile.get_dataset_list(fname) maxDigit = max([len(i) for i in dsNames]) dsDict = dict() for dsName in dsNames: print('subsetting {d:<{w}} from {f} ...'.format( d=dsName, w=maxDigit, f=os.path.basename(fname))) data = readfile.read(fname, datasetName=dsName, print_msg=False)[0] # subset 2D data if len(data.shape) == 2: data_overlap = data[pix_box4data[1]:pix_box4data[3], pix_box4data[0]:pix_box4data[2]] data = np.ones((pix_box[3] - pix_box[1], pix_box[2] - pix_box[0]), data.dtype) * subset_dict['fill_value'] data[pix_box4subset[1]:pix_box4subset[3], pix_box4subset[0]:pix_box4subset[2]] = data_overlap # subset 3D data elif len(data.shape) == 3: data_overlap = data[:, pix_box4data[1]:pix_box4data[3], pix_box4data[0]:pix_box4data[2]] data = np.ones((data.shape[0], pix_box[3] - pix_box[1], pix_box[2] - pix_box[0]), data.dtype) * subset_dict['fill_value'] data[:, pix_box4subset[1]:pix_box4subset[3], pix_box4subset[0]:pix_box4subset[2]] = data_overlap dsDict[dsName] = data atr = ut.subset_attribute(atr, pix_box) writefile.write(dsDict, out_file=out_file, metadata=atr, ref_file=fname) return out_file
def stitch_files(fnames, out_file, apply_offset=True, disp_fig=True, no_data_value=None): """Stitch all input files into one """ fext = os.path.splitext(fnames[0])[1] atr = readfile.read_attribute(fnames[0]) # grab ds_names ds_names = [None] if fext in ['.h5', '.he5']: # get the common dataset list among all input files ds_names = set(readfile.get_dataset_list(fnames[0])) for fname in fnames[1:]: ds_names.intersection_update(readfile.get_dataset_list(fname)) ds_names = sorted(list(ds_names)) # special treatment for velocity/time_function files if atr['FILE_TYPE'] == 'velocity' and len(ds_names) > 1: ds_names = ['velocity'] print('files to be stitched: {}'.format(fnames)) print('datasets to be stitched: {}'.format(ds_names)) # stitching dsDict = {} for ds_name in ds_names: # reading mat, atr = readfile.read(fnames[0], datasetName=ds_name) ds_name_out = ds_name if ds_name else atr['FILE_TYPE'] print('#' * 50) print(f'read {ds_name_out} from file: {fnames[0]}') # masking if no_data_value is not None: print('convert no_data_value from {} to NaN'.format(no_data_value)) mat[mat == no_data_value] = np.nan # skip pixels with zero incidenceAngle for geometry files if atr['FILE_TYPE'] == 'geometry' and 'incidenceAngle' in ds_names: print('ignore pixels with ZERO incidenceAngle') inc_angle = readfile.read(fnames[0], datasetName='incidenceAngle')[0] mat[inc_angle == 0] = np.nan for i, fname in enumerate(fnames[1:]): print('-' * 30) print('read data from file: {}'.format(fname)) # reading mat2, atr2 = readfile.read(fname, datasetName=ds_name) # masking if no_data_value is not None: mat2[mat2 == no_data_value] = np.nan # skip pixels with zero incidenceAngle for geometry files if atr['FILE_TYPE'] == 'geometry' and 'incidenceAngle' in ds_names: print('ignore pixels with ZERO incidenceAngle') inc_angle2 = readfile.read(fname, datasetName='incidenceAngle')[0] mat2[inc_angle2 == 0] = np.nan print('stitching ...') (mat, atr, mat11, mat22, mat_diff) = stitch_two_matrices(mat, atr, mat2, atr2, apply_offset=apply_offset) # plot if apply_offset: print('plot stitching & shifting result ...') suffix = '{}{}'.format(i, i + 1) out_fig = '{}_{}.png'.format( os.path.splitext(out_file)[0], suffix) plot_stitch(mat11, mat22, mat, mat_diff, out_fig=out_fig) dsDict[ds_name_out] = mat # write output file print('#' * 50) writefile.write(dsDict, out_file=out_file, metadata=atr) # plot if disp_fig: print('showing ...') plt.show() else: plt.close() return out_file
def run_load_data(self, step_name): """Load InSAR stacks into HDF5 files in ./inputs folder. It 1) copy auxiliary files into work directory (for Unvi of Miami only) 2) load all interferograms stack files into mintpy/inputs directory. 3) check loading result 4) add custom metadata (optional, for HDF-EOS5 format only) """ # 1) copy aux files (optional) self._copy_aux_file() # 2) loading data scp_args = '--template {}'.format(self.templateFile) if self.customTemplateFile: scp_args += ' {}'.format(self.customTemplateFile) if self.projectName: scp_args += ' --project {}'.format(self.projectName) # run print("load_data.py", scp_args) mintpy.load_data.main(scp_args.split()) os.chdir(self.workDir) # 3) check loading result load_complete, stack_file, geom_file = ut.check_loaded_dataset( self.workDir, print_msg=True)[0:3] # 3.1) output waterMask.h5 water_mask_file = 'waterMask.h5' if 'waterMask' in readfile.get_dataset_list(geom_file): print('generate {} from {} for conveniency'.format( water_mask_file, geom_file)) if ut.run_or_skip(out_file=water_mask_file, in_file=geom_file) == 'run': water_mask, atr = readfile.read(geom_file, datasetName='waterMask') atr['FILE_TYPE'] = 'waterMask' writefile.write(water_mask, out_file=water_mask_file, metadata=atr) # 4) add custom metadata (optional) if self.customTemplateFile: print('updating {}, {} metadata based on custom template file: {}'. format(os.path.basename(stack_file), os.path.basename(geom_file), os.path.basename(self.customTemplateFile))) # use ut.add_attribute() instead of add_attribute.py because of # better control of special metadata, such as SUBSET_X/YMIN ut.add_attribute(stack_file, self.customTemplate) ut.add_attribute(geom_file, self.customTemplate) # 5) if not load_complete, plot and raise exception if not load_complete: # plot result if error occured self.plot_result(print_aux=False, plot=plot) # go back to original directory print('Go back to directory:', self.cwd) os.chdir(self.cwd) # raise error msg = 'step {}: NOT all required dataset found, exit.'.format( step_name) raise RuntimeError(msg) return
def filter_file(fname, ds_names=None, filter_type='lowpass_gaussian', filter_par=None, fname_out=None): """Filter 2D matrix with selected filter Inputs: fname : string, name/path of file to be filtered ds_names : list of string, datasets of interest filter_type : string, filter type filter_par : (list of) int/float, optional, parameter for low/high pass filter for low/highpass_avg, it's kernel size in int for low/highpass_gaussain, it's sigma in float for double_difference, it's local and regional kernel sizes in int Output: fname_out : string, optional, output file name/path """ # Info filter_type = filter_type.lower() atr = readfile.read_attribute(fname) k = atr['FILE_TYPE'] msg = 'filtering {} file: {} using {} filter'.format(k, fname, filter_type) if filter_type.endswith('avg'): if not filter_par: filter_par = 5 elif isinstance(filter_par, list): filter_par = filter_par[0] filter_par = int(filter_par) msg += ' with kernel size of {}'.format(filter_par) elif filter_type.endswith('gaussian'): if not filter_par: filter_par = 3.0 elif isinstance(filter_par, list): filter_par = filter_par[0] filter_par = float(filter_par) msg += ' with sigma of {:.1f}'.format(filter_par) elif filter_type == 'double_difference': if not filter_par: filter_par = [1, 10] local, regional = int(filter_par[0]), int(filter_par[1]) msg += ' with local/regional kernel sizes of {}/{}'.format( local, regional) print(msg) # output filename if not fname_out: fname_out = '{}_{}{}'.format( os.path.splitext(fname)[0], filter_type, os.path.splitext(fname)[1]) # filtering file ds_all = readfile.get_dataset_list(fname) if not ds_names: ds_names = ds_all ds_skips = list(set(ds_all) - set(ds_names)) maxDigit = max([len(i) for i in ds_names]) dsDict = dict() for ds_name in ds_skips: dsDict[ds_name] = readfile.read(fname, datasetName=ds_name, print_msg=False)[0] for ds_name in ds_names: msg = 'filtering {d:<{w}} from {f} '.format(d=ds_name, w=maxDigit, f=os.path.basename(fname)) # read data = readfile.read(fname, datasetName=ds_name, print_msg=False)[0] # filter if len(data.shape) == 3: num_loop = data.shape[0] for i in range(num_loop): data[i, :, :] = filter_data(data[i, :, :], filter_type, filter_par) sys.stdout.write('\r{} {}/{} ...'.format(msg, i + 1, num_loop)) sys.stdout.flush() print('') else: data = filter_data(data, filter_type, filter_par) # write dsDict[ds_name] = data writefile.write(dsDict, out_file=fname_out, metadata=atr, ref_file=fname) return fname_out
def run_geocode(inps): """geocode all input files""" start_time = time.time() # Prepare geometry for geocoding res_obj = resample(lookupFile=inps.lookupFile, dataFile=inps.file[0], SNWE=inps.SNWE, laloStep=inps.laloStep, processor=inps.processor) res_obj.open() # resample input files one by one for infile in inps.file: print('-' * 50+'\nresampling file: {}'.format(infile)) ext = os.path.splitext(infile)[1] atr = readfile.read_attribute(infile, datasetName=inps.dset) outfile = auto_output_filename(infile, inps) if inps.updateMode and ut.run_or_skip(outfile, in_file=[infile, inps.lookupFile]) == 'skip': print('update mode is ON, skip geocoding.') continue # read source data and resample dsNames = readfile.get_dataset_list(infile, datasetName=inps.dset) maxDigit = max([len(i) for i in dsNames]) dsResDict = dict() for dsName in dsNames: print('reading {d:<{w}} from {f} ...'.format(d=dsName, w=maxDigit, f=os.path.basename(infile))) if ext in ['.h5','.he5']: data = readfile.read(infile, datasetName=dsName, print_msg=False)[0] else: data, atr = readfile.read(infile, datasetName=dsName, print_msg=False) # keep timeseries data as 3D matrix when there is only one acquisition # because readfile.read() will squeeze it to 2D if atr['FILE_TYPE'] == 'timeseries' and len(data.shape) == 2: data = np.reshape(data, (1, data.shape[0], data.shape[1])) res_data = res_obj.run_resample(src_data=data, interp_method=inps.interpMethod, fill_value=inps.fillValue, nprocs=inps.nprocs, print_msg=True) dsResDict[dsName] = res_data # update metadata if inps.radar2geo: atr = metadata_radar2geo(atr, res_obj) else: atr = metadata_geo2radar(atr, res_obj) #if len(dsNames) == 1 and dsName not in ['timeseries']: # atr['FILE_TYPE'] = dsNames[0] # infile = None writefile.write(dsResDict, out_file=outfile, metadata=atr, ref_file=infile) m, s = divmod(time.time()-start_time, 60) print('time used: {:02.0f} mins {:02.1f} secs.\n'.format(m, s)) return outfile
def multilook_file(infile, lks_y, lks_x, outfile=None, method='average', margin=[0, 0, 0, 0]): """ Multilook input file Parameters: infile - str, path of input file to be multilooked. lks_y - int, number of looks in y / row direction. lks_x - int, number of looks in x / column direction. margin - list of 4 int, number of pixels to be skipped during multilooking. useful for offset product, where the marginal pixels are ignored during cross correlation matching. outfile - str, path of output file Returns: outfile - str, path of output file """ lks_y = int(lks_y) lks_x = int(lks_x) # input file info atr = readfile.read_attribute(infile) length, width = int(atr['LENGTH']), int(atr['WIDTH']) k = atr['FILE_TYPE'] print('multilooking {} {} file: {}'.format(atr['PROCESSOR'], k, infile)) print('number of looks in y / azimuth direction: %d' % lks_y) print('number of looks in x / range direction: %d' % lks_x) print('multilook method: {}'.format(method)) # margin --> box if margin is not [0, 0, 0, 0]: # top, bottom, left, right box = (margin[2], margin[0], width - margin[3], length - margin[1]) print( 'number of pixels to skip in top/bottom/left/right boundaries: {}'. format(margin)) else: box = (0, 0, width, length) # output file name ext = os.path.splitext(infile)[1] if not outfile: if os.getcwd() == os.path.dirname(os.path.abspath(infile)): outfile = os.path.splitext(infile)[0] + '_' + str( lks_y) + 'alks_' + str(lks_x) + 'rlks' + ext else: outfile = os.path.basename(infile) # update metadata atr = multilook_attribute(atr, lks_y, lks_x, box=box) if ext in ['.h5', '.he5']: writefile.layout_hdf5(outfile, metadata=atr, ref_file=infile) # read source data and multilooking dsNames = readfile.get_dataset_list(infile) maxDigit = max([len(i) for i in dsNames]) dsDict = dict() for dsName in dsNames: print('multilooking {d:<{w}} from {f} ...'.format( d=dsName, w=maxDigit, f=os.path.basename(infile))) # split in Y/row direction for IO for HDF5 only if ext in ['.h5', '.he5']: row_step = 200 else: row_step = box[3] - box[1] num_step = int(np.ceil((box[3] - box[1]) / (row_step * lks_y))) for i in range(num_step): r0 = box[1] + row_step * lks_y * i r1 = box[1] + row_step * lks_y * (i + 1) r1 = min(r1, box[3]) # IO box box_i = (box[0], r0, box[2], r1) box_o = (int((box[0] - box[0]) / lks_x), int( (r0 - box[1]) / lks_y), int( (box[2] - box[0]) / lks_x), int((r1 - box[1]) / lks_y)) print('box: {}'.format(box_o)) # read / multilook if method == 'nearest': data = readfile.read(infile, datasetName=dsName, box=box_i, xstep=lks_x, ystep=lks_y, print_msg=False)[0] # fix the size discrepency between average / nearest method out_len = box_o[3] - box_o[1] out_wid = box_o[2] - box_o[0] if data.ndim == 3: data = data[:, :out_len, :out_wid] else: data = data[:out_len, :out_wid] else: data = readfile.read(infile, datasetName=dsName, box=box_i, print_msg=False)[0] # keep timeseries data as 3D matrix when there is only one acquisition # because readfile.read() will squeeze it to 2D if atr['FILE_TYPE'] == 'timeseries' and len(data.shape) == 2: data = np.reshape(data, (1, data.shape[0], data.shape[1])) data = multilook_data(data, lks_y, lks_x) # output block if data.ndim == 3: block = [ 0, data.shape[0], box_o[1], box_o[3], box_o[0], box_o[2] ] else: block = [box_o[1], box_o[3], box_o[0], box_o[2]] # write if ext in ['.h5', '.he5']: writefile.write_hdf5_block(outfile, data=data, datasetName=dsName, block=block, print_msg=False) else: dsDict[dsName] = data # for binary file with 2 bands, always use BIL scheme if (len(dsDict.keys()) == 2 and os.path.splitext(infile)[1] not in ['.h5', '.he5'] and atr.get('scheme', 'BIL').upper() != 'BIL'): print('the input binary file has 2 bands with band interleave as: {}'. format(atr['scheme'])) print( 'for the output binary file, change the band interleave to BIL as default.' ) atr['scheme'] = 'BIL' if ext not in ['.h5', '.he5']: writefile.write(dsDict, out_file=outfile, metadata=atr, ref_file=infile) return outfile