def read_subset_box(template_file, meta): """Read subset info from template file Parameters: template_file - str, path of template file meta - dict, metadata Returns: pix_box - tuple of 4 int in (x0, y0, x1, y1) meta - dict, metadata """ if template_file and os.path.isfile(template_file): # read subset info from template file pix_box, geo_box = read_subset_template2box(template_file) # geo_box --> pix_box if geo_box is not None: coord = ut.coordinate(meta) pix_box = coord.bbox_geo2radar(geo_box) pix_box = coord.check_box_within_data_coverage(pix_box) print('input bounding box in lalo: {}'.format(geo_box)) else: pix_box = None if pix_box is not None: # update metadata against the new bounding box print('input bounding box in yx: {}'.format(pix_box)) meta = ut.subset_attribute(meta, pix_box) else: # translate box of None to tuple of 4 int length, width = int(meta['LENGTH']), int(meta['WIDTH']) pix_box = (0, 0, width, length) return pix_box, meta
def write2hdf5(self, outputFile='ifgramStack.h5', access_mode='w', box=None, compression=None, extra_metadata=None): '''Save/write an ifgramStackDict object into an HDF5 file with the structure below: / Root level Attributes Dictionary for metadata /date 2D array of string in size of (m, 2 ) in YYYYMMDD format for master and slave date /bperp 1D array of float32 in size of (m, ) in meter. /dropIfgram 1D array of bool in size of (m, ) True by default for keeping interferogram. /unwrapPhase 3D array of float32 in size of (m, l, w) in radian. /coherence 3D array of float32 in size of (m, l, w). /connectComponent 3D array of int16 in size of (m, l, w). (optional) /wrapPhase 3D array of float32 in size of (m, l, w) in radian. (optional) /iono 3D array of float32 in size of (m, l, w) in radian. (optional) /rangeOffset 3D array of float32 in size of (m, l, w). (optional) /azimuthOffset 3D array of float32 in size of (m, l, w). (optional) Parameters: outputFile : str, Name of the HDF5 file for the InSAR stack access_mode : str, access mode of output File, e.g. w, r+ box : tuple, subset range in (x0, y0, x1, y1) extra_metadata : dict, extra metadata to be added into output file Returns: outputFile ''' self.outputFile = outputFile f = h5py.File(self.outputFile, access_mode) print('create HDF5 file {} with {} mode'.format(self.outputFile, access_mode)) self.pairs = sorted([pair for pair in self.pairsDict.keys()]) self.dsNames = list(self.pairsDict[self.pairs[0]].datasetDict.keys()) self.dsNames = [i for i in ifgramDatasetNames if i in self.dsNames] maxDigit = max([len(i) for i in self.dsNames]) self.get_size(box) self.bperp = np.zeros(self.numIfgram) ############################### # 3D datasets containing unwrapPhase, coherence, connectComponent, wrapPhase, etc. for dsName in self.dsNames: dsShape = (self.numIfgram, self.length, self.width) dsDataType = dataType dsCompression = compression if dsName in ['connectComponent']: dsDataType = np.int16 dsCompression = 'lzf' print(('create dataset /{d:<{w}} of {t:<25} in size of {s}' ' with compression = {c}').format(d=dsName, w=maxDigit, t=str(dsDataType), s=dsShape, c=dsCompression)) ds = f.create_dataset(dsName, shape=dsShape, maxshape=(None, dsShape[1], dsShape[2]), dtype=dsDataType, chunks=True, compression=dsCompression) prog_bar = ptime.progressBar(maxValue=self.numIfgram) for i in range(self.numIfgram): ifgramObj = self.pairsDict[self.pairs[i]] data = ifgramObj.read(dsName, box=box)[0] ds[i, :, :] = data self.bperp[i] = ifgramObj.get_perp_baseline() prog_bar.update(i+1, suffix='{}_{}'.format(self.pairs[i][0], self.pairs[i][1])) prog_bar.close() ds.attrs['MODIFICATION_TIME'] = str(time.time()) ############################### # 2D dataset containing master and slave dates of all pairs dsName = 'date' dsDataType = np.string_ dsShape = (self.numIfgram, 2) print('create dataset /{d:<{w}} of {t:<25} in size of {s}'.format(d=dsName, w=maxDigit, t=str(dsDataType), s=dsShape)) data = np.array(self.pairs, dtype=dsDataType) f.create_dataset(dsName, data=data) ############################### # 1D dataset containing perpendicular baseline of all pairs dsName = 'bperp' dsDataType = dataType dsShape = (self.numIfgram,) print('create dataset /{d:<{w}} of {t:<25} in size of {s}'.format(d=dsName, w=maxDigit, t=str(dsDataType), s=dsShape)) data = np.array(self.bperp, dtype=dsDataType) f.create_dataset(dsName, data=data) ############################### # 1D dataset containing bool value of dropping the interferograms or not dsName = 'dropIfgram' dsDataType = np.bool_ dsShape = (self.numIfgram,) print('create dataset /{d:<{w}} of {t:<25} in size of {s}'.format(d=dsName, w=maxDigit, t=str(dsDataType), s=dsShape)) data = np.ones(dsShape, dtype=dsDataType) f.create_dataset(dsName, data=data) ############################### # Attributes self.get_metadata() if extra_metadata: self.metadata.update(extra_metadata) print('add extra metadata: {}'.format(extra_metadata)) self.metadata = ut.subset_attribute(self.metadata, box) self.metadata['FILE_TYPE'] = self.name for key, value in self.metadata.items(): f.attrs[key] = value f.close() print('Finished writing to {}'.format(self.outputFile)) return self.outputFile
def write2hdf5(self, outputFile='geometryRadar.h5', access_mode='w', box=None, compression='lzf', extra_metadata=None): ''' / Root level Attributes Dictionary for metadata. 'X/Y_FIRST/STEP' attribute for geocoded. /height 2D array of float32 in size of (l, w ) in meter. /latitude (azimuthCoord) 2D array of float32 in size of (l, w ) in degree. /longitude (rangeCoord) 2D array of float32 in size of (l, w ) in degree. /incidenceAngle 2D array of float32 in size of (l, w ) in degree. /slantRangeDistance 2D array of float32 in size of (l, w ) in meter. /azimuthAngle 2D array of float32 in size of (l, w ) in degree. (optional) /shadowMask 2D array of bool in size of (l, w ). (optional) /waterMask 2D array of bool in size of (l, w ). (optional) /bperp 3D array of float32 in size of (n, l, w) in meter (optional) /date 1D array of string in size of (n, ) in YYYYMMDD(optional) ... ''' if len(self.datasetDict) == 0: print('No dataset file path in the object, skip HDF5 file writing.') return None self.outputFile = outputFile f = h5py.File(self.outputFile, access_mode) print('create HDF5 file {} with {} mode'.format(self.outputFile, access_mode)) #groupName = self.name #group = f.create_group(groupName) #print('create group /{}'.format(groupName)) maxDigit = max([len(i) for i in geometryDatasetNames]) length, width = self.get_size(box=box) self.length, self.width = self.get_size() ############################### for dsName in self.dsNames: # 3D datasets containing bperp if dsName == 'bperp': self.dateList = list(self.datasetDict[dsName].keys()) dsDataType = dataType self.numDate = len(self.dateList) dsShape = (self.numDate, length, width) ds = f.create_dataset(dsName, shape=dsShape, maxshape=(None, dsShape[1], dsShape[2]), dtype=dsDataType, chunks=True, compression=compression) print(('create dataset /{d:<{w}} of {t:<25} in size of {s}' ' with compression = {c}').format(d=dsName, w=maxDigit, t=str(dsDataType), s=dsShape, c=str(compression))) print('read coarse grid baseline files and linear interpolate into full resolution ...') prog_bar = ptime.progressBar(maxValue=self.numDate) for i in range(self.numDate): fname = self.datasetDict[dsName][self.dateList[i]] data = read_isce_bperp_file(fname=fname, out_shape=(self.length, self.width), box=box) ds[i, :, :] = data prog_bar.update(i+1, suffix=self.dateList[i]) prog_bar.close() # Write 1D dataset date dsName = 'date' dsShape = (self.numDate,) dsDataType = np.string_ print(('create dataset /{d:<{w}} of {t:<25}' ' in size of {s}').format(d=dsName, w=maxDigit, t=str(dsDataType), s=dsShape)) data = np.array(self.dateList, dtype=dsDataType) ds = f.create_dataset(dsName, data=data) # 2D datasets containing height, latitude, incidenceAngle, shadowMask, etc. else: dsDataType = dataType if dsName.lower().endswith('mask'): dsDataType = np.bool_ dsShape = (length, width) print(('create dataset /{d:<{w}} of {t:<25} in size of {s}' ' with compression = {c}').format(d=dsName, w=maxDigit, t=str(dsDataType), s=dsShape, c=str(compression))) data = np.array(self.read(family=dsName, box=box)[0], dtype=dsDataType) ds = f.create_dataset(dsName, data=data, chunks=True, compression=compression) ############################### # Generate Dataset if not existed in binary file: incidenceAngle, slantRangeDistance for dsName in [i for i in ['incidenceAngle', 'slantRangeDistance'] if i not in self.dsNames]: # Calculate data data = None if dsName == 'incidenceAngle': data = self.get_incidence_angle(box=box) elif dsName == 'slantRangeDistance': data = self.get_slant_range_distance(box=box) # Write dataset if data is not None: dsShape = data.shape dsDataType = dataType print(('create dataset /{d:<{w}} of {t:<25} in size of {s}' ' with compression = {c}').format(d=dsName, w=maxDigit, t=str(dsDataType), s=dsShape, c=str(compression))) ds = f.create_dataset(dsName, data=data, dtype=dataType, chunks=True, compression=compression) ############################### # Attributes self.get_metadata() if extra_metadata: self.metadata.update(extra_metadata) print('add extra metadata: {}'.format(extra_metadata)) self.metadata = ut.subset_attribute(self.metadata, box) self.metadata['FILE_TYPE'] = self.name for key, value in self.metadata.items(): f.attrs[key] = value f.close() print('Finished writing to {}'.format(self.outputFile)) return self.outputFile
def subset_file(fname, subset_dict_input, out_file=None): """Subset file with Inputs: fname : str, path/name of file out_file : str, path/name of output file subset_dict : dict, subsut parameter, including the following items: subset_x : list of 2 int, subset in x direction, default=None subset_y : list of 2 int, subset in y direction, default=None subset_lat : list of 2 float, subset in lat direction, default=None subset_lon : list of 2 float, subset in lon direction, default=None fill_value : float, optional. filled value for area outside of data coverage. default=None None/not-existed to subset within data coverage only. tight : bool, tight subset or not, for lookup table file, i.e. geomap*.trans Outputs: out_file : str, path/name of output file; out_file = 'subset_'+fname, if fname is in current directory; out_file = fname, if fname is not in the current directory. """ # Input File Info try: atr = readfile.read_attribute(fname) except: return None width = int(atr['WIDTH']) length = int(atr['LENGTH']) k = atr['FILE_TYPE'] print('subset ' + k + ' file: ' + fname + ' ...') subset_dict = subset_dict_input.copy() # Read Subset Inputs into 4-tuple box in pixel and geo coord pix_box, geo_box = subset_input_dict2box(subset_dict, atr) coord = ut.coordinate(atr) # if fill_value exists and not None, subset data and fill assigned value for area out of its coverage. # otherwise, re-check subset to make sure it's within data coverage and initialize the matrix with np.nan outfill = False if 'fill_value' in subset_dict.keys() and subset_dict['fill_value']: outfill = True else: outfill = False if not outfill: pix_box = coord.check_box_within_data_coverage(pix_box) subset_dict['fill_value'] = np.nan geo_box = coord.box_pixel2geo(pix_box) data_box = (0, 0, width, length) print('data range in y/x: ' + str(data_box)) print('subset range in y/x: ' + str(pix_box)) print('data range in lat/lon: ' + str(coord.box_pixel2geo(data_box))) print('subset range in lat/lon: ' + str(geo_box)) if pix_box == data_box: print('Subset range == data coverage, no need to subset. Skip.') return fname # Calculate Subset/Overlap Index pix_box4data, pix_box4subset = get_box_overlap_index(data_box, pix_box) ########################### Data Read and Write ###################### # Output File Name if not out_file: if os.getcwd() == os.path.dirname(os.path.abspath(fname)): if 'tight' in subset_dict.keys() and subset_dict['tight']: out_file = '{}_tight{}'.format( os.path.splitext(fname)[0], os.path.splitext(fname)[1]) else: out_file = 'subset_' + os.path.basename(fname) else: out_file = os.path.basename(fname) print('writing >>> ' + out_file) # subset datasets one by one dsNames = readfile.get_dataset_list(fname) maxDigit = max([len(i) for i in dsNames]) dsDict = dict() for dsName in dsNames: print('subsetting {d:<{w}} from {f} ...'.format( d=dsName, w=maxDigit, f=os.path.basename(fname))) data = readfile.read(fname, datasetName=dsName, print_msg=False)[0] # subset 2D data if len(data.shape) == 2: data_overlap = data[pix_box4data[1]:pix_box4data[3], pix_box4data[0]:pix_box4data[2]] data = np.ones((pix_box[3] - pix_box[1], pix_box[2] - pix_box[0]), data.dtype) * subset_dict['fill_value'] data[pix_box4subset[1]:pix_box4subset[3], pix_box4subset[0]:pix_box4subset[2]] = data_overlap # subset 3D data elif len(data.shape) == 3: data_overlap = data[:, pix_box4data[1]:pix_box4data[3], pix_box4data[0]:pix_box4data[2]] data = np.ones( (data.shape[0], pix_box[3] - pix_box[1], pix_box[2] - pix_box[0]), data.dtype) * subset_dict['fill_value'] data[:, pix_box4subset[1]:pix_box4subset[3], pix_box4subset[0]:pix_box4subset[2]] = data_overlap dsDict[dsName] = data atr = ut.subset_attribute(atr, pix_box) writefile.write(dsDict, out_file=out_file, metadata=atr, ref_file=fname) return out_file
def subset_file(fname, subset_dict_input, out_file=None): """Subset file with Inputs: fname : str, path/name of file out_file : str, path/name of output file subset_dict : dict, subsut parameter, including the following items: subset_x : list of 2 int, subset in x direction, default=None subset_y : list of 2 int, subset in y direction, default=None subset_lat : list of 2 float, subset in lat direction, default=None subset_lon : list of 2 float, subset in lon direction, default=None fill_value : float, optional. filled value for area outside of data coverage. default=None None/not-existed to subset within data coverage only. tight : bool, tight subset or not, for lookup table file, i.e. geomap*.trans Outputs: out_file : str, path/name of output file; out_file = 'subset_'+fname, if fname is in current directory; out_file = fname, if fname is not in the current directory. """ # Input File Info try: atr = readfile.read_attribute(fname) except: return None width = int(atr['WIDTH']) length = int(atr['LENGTH']) k = atr['FILE_TYPE'] print('subset '+k+' file: '+fname+' ...') subset_dict = subset_dict_input.copy() # Read Subset Inputs into 4-tuple box in pixel and geo coord pix_box, geo_box = subset_input_dict2box(subset_dict, atr) coord = ut.coordinate(atr) # if fill_value exists and not None, subset data and fill assigned value for area out of its coverage. # otherwise, re-check subset to make sure it's within data coverage and initialize the matrix with np.nan outfill = False if 'fill_value' in subset_dict.keys() and subset_dict['fill_value']: outfill = True else: outfill = False if not outfill: pix_box = coord.check_box_within_data_coverage(pix_box) subset_dict['fill_value'] = np.nan geo_box = coord.box_pixel2geo(pix_box) data_box = (0, 0, width, length) print('data range in y/x: '+str(data_box)) print('subset range in y/x: '+str(pix_box)) print('data range in lat/lon: '+str(coord.box_pixel2geo(data_box))) print('subset range in lat/lon: '+str(geo_box)) if pix_box == data_box: print('Subset range == data coverage, no need to subset. Skip.') return fname # Calculate Subset/Overlap Index pix_box4data, pix_box4subset = get_box_overlap_index(data_box, pix_box) ########################### Data Read and Write ###################### # Output File Name if not out_file: if os.getcwd() == os.path.dirname(os.path.abspath(fname)): if 'tight' in subset_dict.keys() and subset_dict['tight']: out_file = '{}_tight{}'.format(os.path.splitext(fname)[0], os.path.splitext(fname)[1]) else: out_file = 'subset_'+os.path.basename(fname) else: out_file = os.path.basename(fname) print('writing >>> '+out_file) # subset datasets one by one dsNames = readfile.get_dataset_list(fname) maxDigit = max([len(i) for i in dsNames]) dsDict = dict() for dsName in dsNames: print('subsetting {d:<{w}} from {f} ...'.format( d=dsName, w=maxDigit, f=os.path.basename(fname))) data = readfile.read(fname, datasetName=dsName, print_msg=False)[0] # subset 2D data if len(data.shape) == 2: data_overlap = data[pix_box4data[1]:pix_box4data[3], pix_box4data[0]:pix_box4data[2]] data = np.ones((pix_box[3] - pix_box[1], pix_box[2] - pix_box[0]), data.dtype) * subset_dict['fill_value'] data[pix_box4subset[1]:pix_box4subset[3], pix_box4subset[0]:pix_box4subset[2]] = data_overlap # subset 3D data elif len(data.shape) == 3: data_overlap = data[:, pix_box4data[1]:pix_box4data[3], pix_box4data[0]:pix_box4data[2]] data = np.ones((data.shape[0], pix_box[3] - pix_box[1], pix_box[2] - pix_box[0]), data.dtype) * subset_dict['fill_value'] data[:, pix_box4subset[1]:pix_box4subset[3], pix_box4subset[0]:pix_box4subset[2]] = data_overlap dsDict[dsName] = data atr = ut.subset_attribute(atr, pix_box) writefile.write(dsDict, out_file=out_file, metadata=atr, ref_file=fname) return out_file
def write2hdf5(self, outputFile='ifgramStack.h5', access_mode='w', box=None, compression=None, extra_metadata=None): '''Save/write an ifgramStackDict object into an HDF5 file with the structure below: / Root level Attributes Dictionary for metadata /date 2D array of string in size of (m, 2 ) in YYYYMMDD format for master and slave date /bperp 1D array of float32 in size of (m, ) in meter. /dropIfgram 1D array of bool in size of (m, ). /unwrapPhase 3D array of float32 in size of (m, l, w) in radian. /coherence 3D array of float32 in size of (m, l, w). /connectComponent 3D array of int16 in size of (m, l, w). (optional) /wrapPhase 3D array of float32 in size of (m, l, w) in radian. (optional) /iono 3D array of float32 in size of (m, l, w) in radian. (optional) /rangeOffset 3D array of float32 in size of (m, l, w). (optional) /azimuthOffset 3D array of float32 in size of (m, l, w). (optional) Parameters: outputFile : str, Name of the HDF5 file for the InSAR stack access_mode : str, access mode of output File, e.g. w, r+ box : tuple, subset range in (x0, y0, x1, y1) extra_metadata : dict, extra metadata to be added into output file Returns: outputFile ''' self.outputFile = outputFile f = h5py.File(self.outputFile, access_mode) print('create HDF5 file {} with {} mode'.format(self.outputFile, access_mode)) self.pairs = sorted([pair for pair in self.pairsDict.keys()]) self.dsNames = list(self.pairsDict[self.pairs[0]].datasetDict.keys()) self.dsNames = [i for i in ifgramDatasetNames if i in self.dsNames] maxDigit = max([len(i) for i in self.dsNames]) self.get_size(box) self.bperp = np.zeros(self.numIfgram) ############################### # 3D datasets containing unwrapPhase, coherence, connectComponent, wrapPhase, etc. for dsName in self.dsNames: dsShape = (self.numIfgram, self.length, self.width) dsDataType = dataType if dsName in ['connectComponent']: dsDataType = np.bool_ print(('create dataset /{d:<{w}} of {t:<25} in size of {s}' ' with compression = {c}').format(d=dsName, w=maxDigit, t=str(dsDataType), s=dsShape, c=str(compression))) ds = f.create_dataset(dsName, shape=dsShape, maxshape=(None, dsShape[1], dsShape[2]), dtype=dsDataType, chunks=True, compression=compression) prog_bar = ptime.progressBar(maxValue=self.numIfgram) for i in range(self.numIfgram): ifgramObj = self.pairsDict[self.pairs[i]] data = ifgramObj.read(dsName, box=box)[0] ds[i, :, :] = data self.bperp[i] = ifgramObj.get_perp_baseline() prog_bar.update(i+1, suffix='{}_{}'.format(self.pairs[i][0], self.pairs[i][1])) prog_bar.close() ds.attrs['MODIFICATION_TIME'] = str(time.time()) ############################### # 2D dataset containing master and slave dates of all pairs dsName = 'date' dsDataType = np.string_ dsShape = (self.numIfgram, 2) print('create dataset /{d:<{w}} of {t:<25} in size of {s}'.format(d=dsName, w=maxDigit, t=str(dsDataType), s=dsShape)) data = np.array(self.pairs, dtype=dsDataType) f.create_dataset(dsName, data=data) ############################### # 1D dataset containing perpendicular baseline of all pairs dsName = 'bperp' dsDataType = dataType dsShape = (self.numIfgram,) print('create dataset /{d:<{w}} of {t:<25} in size of {s}'.format(d=dsName, w=maxDigit, t=str(dsDataType), s=dsShape)) data = np.array(self.bperp, dtype=dsDataType) f.create_dataset(dsName, data=data) ############################### # 1D dataset containing bool value of dropping the interferograms or not dsName = 'dropIfgram' dsDataType = np.bool_ dsShape = (self.numIfgram,) print('create dataset /{d:<{w}} of {t:<25} in size of {s}'.format(d=dsName, w=maxDigit, t=str(dsDataType), s=dsShape)) data = np.ones(dsShape, dtype=dsDataType) f.create_dataset(dsName, data=data) ############################### # Attributes self.get_metadata() if extra_metadata: self.metadata.update(extra_metadata) print('add extra metadata: {}'.format(extra_metadata)) self.metadata = ut.subset_attribute(self.metadata, box) self.metadata['FILE_TYPE'] = self.name for key, value in self.metadata.items(): f.attrs[key] = value f.close() print('Finished writing to {}'.format(self.outputFile)) return self.outputFile
def write2hdf5(self, outputFile='geometryRadar.h5', access_mode='w', box=None, compression='gzip', extra_metadata=None): ''' / Root level Attributes Dictionary for metadata. 'X/Y_FIRST/STEP' attribute for geocoded. /height 2D array of float32 in size of (l, w ) in meter. /latitude (azimuthCoord) 2D array of float32 in size of (l, w ) in degree. /longitude (rangeCoord) 2D array of float32 in size of (l, w ) in degree. /incidenceAngle 2D array of float32 in size of (l, w ) in degree. /slantRangeDistance 2D array of float32 in size of (l, w ) in meter. /azimuthAngle 2D array of float32 in size of (l, w ) in degree. (optional) /shadowMask 2D array of bool in size of (l, w ). (optional) /waterMask 2D array of bool in size of (l, w ). (optional) /bperp 3D array of float32 in size of (n, l, w) in meter (optional) /date 1D array of string in size of (n, ) in YYYYMMDD(optional) ... ''' if len(self.datasetDict) == 0: print('No dataset file path in the object, skip HDF5 file writing.') return None self.outputFile = outputFile f = h5py.File(self.outputFile, access_mode) print('create HDF5 file {} with {} mode'.format(self.outputFile, access_mode)) #groupName = self.name #group = f.create_group(groupName) #print('create group /{}'.format(groupName)) maxDigit = max([len(i) for i in geometryDatasetNames]) length, width = self.get_size(box=box) self.length, self.width = self.get_size() ############################### for dsName in self.dsNames: # 3D datasets containing bperp if dsName == 'bperp': self.dateList = list(self.datasetDict[dsName].keys()) dsDataType = dataType self.numDate = len(self.dateList) dsShape = (self.numDate, length, width) ds = f.create_dataset(dsName, shape=dsShape, maxshape=(None, dsShape[1], dsShape[2]), dtype=dsDataType, chunks=True, compression=compression) print(('create dataset /{d:<{w}} of {t:<25} in size of {s}' ' with compression = {c}').format(d=dsName, w=maxDigit, t=str(dsDataType), s=dsShape, c=str(compression))) print('read coarse grid baseline files and linear interpolate into full resolution ...') prog_bar = ptime.progressBar(maxValue=self.numDate) for i in range(self.numDate): fname = self.datasetDict[dsName][self.dateList[i]] data = read_isce_bperp_file(fname=fname, out_shape=(self.length, self.width), box=box) ds[i, :, :] = data prog_bar.update(i+1, suffix=self.dateList[i]) prog_bar.close() # Write 1D dataset date dsName = 'date' dsShape = (self.numDate,) dsDataType = np.string_ print(('create dataset /{d:<{w}} of {t:<25}' ' in size of {s}').format(d=dsName, w=maxDigit, t=str(dsDataType), s=dsShape)) data = np.array(self.dateList, dtype=dsDataType) ds = f.create_dataset(dsName, data=data) # 2D datasets containing height, latitude, incidenceAngle, shadowMask, etc. else: dsDataType = dataType if dsName.lower().endswith('mask'): dsDataType = np.bool_ dsShape = (length, width) print(('create dataset /{d:<{w}} of {t:<25} in size of {s}' ' with compression = {c}').format(d=dsName, w=maxDigit, t=str(dsDataType), s=dsShape, c=str(compression))) data = np.array(self.read(family=dsName, box=box)[0], dtype=dsDataType) ds = f.create_dataset(dsName, data=data, chunks=True, compression=compression) ############################### # Generate Dataset if not existed in binary file: incidenceAngle, slantRangeDistance for dsName in [i for i in ['incidenceAngle', 'slantRangeDistance'] if i not in self.dsNames]: # Calculate data data = None if dsName == 'incidenceAngle': data = self.get_incidence_angle(box=box) elif dsName == 'slantRangeDistance': data = self.get_slant_range_distance(box=box) # Write dataset if data is not None: dsShape = data.shape dsDataType = dataType print(('create dataset /{d:<{w}} of {t:<25} in size of {s}' ' with compression = {c}').format(d=dsName, w=maxDigit, t=str(dsDataType), s=dsShape, c=str(compression))) ds = f.create_dataset(dsName, data=data, dtype=dataType, chunks=True, compression=compression) ############################### # Attributes self.get_metadata() if extra_metadata: self.metadata.update(extra_metadata) print('add extra metadata: {}'.format(extra_metadata)) self.metadata = ut.subset_attribute(self.metadata, box) self.metadata['FILE_TYPE'] = self.name for key, value in self.metadata.items(): f.attrs[key] = value f.close() print('Finished writing to {}'.format(self.outputFile)) return self.outputFile
def subset_file(fname, subset_dict_input, out_file=None): """Subset file with Inputs: fname : str, path/name of file out_file : str, path/name of output file subset_dict : dict, subsut parameter, including the following items: subset_x : list of 2 int, subset in x direction, default=None subset_y : list of 2 int, subset in y direction, default=None subset_lat : list of 2 float, subset in lat direction, default=None subset_lon : list of 2 float, subset in lon direction, default=None fill_value : float, optional. filled value for area outside of data coverage. default=None None/not-existed to subset within data coverage only. tight : bool, tight subset or not, for lookup table file, i.e. geomap*.trans Outputs: out_file : str, path/name of output file; out_file = 'subset_'+fname, if fname is in current directory; out_file = fname, if fname is not in the current directory. """ # Input File Info atr = readfile.read_attribute(fname) width = int(atr['WIDTH']) length = int(atr['LENGTH']) k = atr['FILE_TYPE'] print('subset ' + k + ' file: ' + fname + ' ...') subset_dict = subset_dict_input.copy() # Read Subset Inputs into 4-tuple box in pixel and geo coord pix_box, geo_box = subset_input_dict2box(subset_dict, atr) coord = ut.coordinate(atr) # if fill_value exists and not None, subset data and fill assigned value for area out of its coverage. # otherwise, re-check subset to make sure it's within data coverage and initialize the matrix with np.nan outfill = False if 'fill_value' in subset_dict.keys() and subset_dict['fill_value']: outfill = True else: outfill = False if not outfill: pix_box = coord.check_box_within_data_coverage(pix_box) subset_dict['fill_value'] = np.nan geo_box = coord.box_pixel2geo(pix_box) data_box = (0, 0, width, length) print('data range in y/x: ' + str(data_box)) print('subset range in y/x: ' + str(pix_box)) print('data range in lat/lon: ' + str(coord.box_pixel2geo(data_box))) print('subset range in lat/lon: ' + str(geo_box)) if pix_box == data_box: print('Subset range == data coverage, no need to subset. Skip.') return fname # Calculate Subset/Overlap Index pix_box4data, pix_box4subset = get_box_overlap_index(data_box, pix_box) ########################### Data Read and Write ###################### # Output File Name if not out_file: if os.getcwd() == os.path.dirname(os.path.abspath(fname)): if 'tight' in subset_dict.keys() and subset_dict['tight']: out_file = '{}_tight{}'.format( os.path.splitext(fname)[0], os.path.splitext(fname)[1]) else: out_file = 'subset_' + os.path.basename(fname) else: out_file = os.path.basename(fname) print('writing >>> ' + out_file) # update metadata atr = ut.subset_attribute(atr, pix_box) # subset datasets one by one dsNames = readfile.get_dataset_list(fname) maxDigit = max([len(i) for i in dsNames]) ext = os.path.splitext(out_file)[1] if ext in ['.h5', '.he5']: # initiate the output file writefile.layout_hdf5(out_file, metadata=atr, ref_file=fname) # subset dataset one-by-one for dsName in dsNames: with h5py.File(fname, 'r') as fi: ds = fi[dsName] ds_shape = ds.shape ds_ndim = ds.ndim print('cropping {d} in {b} from {f} ...'.format( d=dsName, b=pix_box4data, f=os.path.basename(fname))) if ds_ndim == 2: # read data = ds[pix_box4data[1]:pix_box4data[3], pix_box4data[0]:pix_box4data[2]] # crop data_out = np.ones( (pix_box[3] - pix_box[1], pix_box[2] - pix_box[0]), data.dtype) * subset_dict['fill_value'] data_out[pix_box4subset[1]:pix_box4subset[3], pix_box4subset[0]:pix_box4subset[2]] = data data_out = np.array(data_out, dtype=data.dtype) # write block = [0, int(atr['LENGTH']), 0, int(atr['WIDTH'])] writefile.write_hdf5_block(out_file, data=data_out, datasetName=dsName, block=block, print_msg=True) if ds_ndim == 3: prog_bar = ptime.progressBar(maxValue=ds_shape[0]) for i in range(ds_shape[0]): # read data = ds[i, pix_box4data[1]:pix_box4data[3], pix_box4data[0]:pix_box4data[2]] # crop data_out = np.ones( (1, pix_box[3] - pix_box[1], pix_box[2] - pix_box[0]), data.dtype) * subset_dict['fill_value'] data_out[:, pix_box4subset[1]:pix_box4subset[3], pix_box4subset[0]:pix_box4subset[2]] = data # write block = [ i, i + 1, 0, int(atr['LENGTH']), 0, int(atr['WIDTH']) ] writefile.write_hdf5_block(out_file, data=data_out, datasetName=dsName, block=block, print_msg=False) prog_bar.update(i + 1, suffix='{}/{}'.format( i + 1, ds_shape[0])) prog_bar.close() print('finished writing to file: {}'.format(fname)) else: # IO for binary files dsDict = dict() for dsName in dsNames: dsDict[dsName] = subset_dataset( fname, dsName, pix_box, pix_box4data, pix_box4subset, fill_value=subset_dict['fill_value']) writefile.write(dsDict, out_file=out_file, metadata=atr, ref_file=fname) return out_file