def run_geocode(inps): """geocode all input files""" start_time = time.time() # feed the largest file for resample object initiation ind_max = np.argmax([os.path.getsize(i) for i in inps.file]) # prepare geometry for geocoding kwargs = dict(interp_method=inps.interpMethod, fill_value=inps.fillValue, nprocs=inps.nprocs, max_memory=inps.maxMemory, software=inps.software, print_msg=True) if inps.latFile and inps.lonFile: kwargs['lat_file'] = inps.latFile kwargs['lon_file'] = inps.lonFile res_obj = resample(lut_file=inps.lookupFile, src_file=inps.file[ind_max], SNWE=inps.SNWE, lalo_step=inps.laloStep, **kwargs) res_obj.open() res_obj.prepare() # resample input files one by one for infile in inps.file: print('-' * 50 + '\nresampling file: {}'.format(infile)) atr = readfile.read_attribute(infile, datasetName=inps.dset) outfile = auto_output_filename(infile, inps) # update_mode if inps.updateMode: print('update mode: ON') if ut.run_or_skip(outfile, in_file=[infile, inps.lookupFile]) == 'skip': continue ## prepare output # update metadata if inps.radar2geo: atr = attr.update_attribute4radar2geo(atr, res_obj=res_obj) else: atr = attr.update_attribute4geo2radar(atr, res_obj=res_obj) # instantiate output file file_is_hdf5 = os.path.splitext(outfile)[1] in ['.h5', '.he5'] if file_is_hdf5: compression = readfile.get_hdf5_compression(infile) writefile.layout_hdf5(outfile, metadata=atr, ref_file=infile, compression=compression) else: dsDict = dict() ## run dsNames = readfile.get_dataset_list(infile, datasetName=inps.dset) maxDigit = max([len(i) for i in dsNames]) for dsName in dsNames: if not file_is_hdf5: dsDict[dsName] = np.zeros((res_obj.length, res_obj.width)) # loop for block-by-block IO for i in range(res_obj.num_box): src_box = res_obj.src_box_list[i] dest_box = res_obj.dest_box_list[i] # read print('-' * 50 + '\nreading {d:<{w}} in block {b} from {f} ...'.format( d=dsName, w=maxDigit, b=src_box, f=os.path.basename(infile))) data = readfile.read(infile, datasetName=dsName, box=src_box, print_msg=False)[0] # resample data = res_obj.run_resample(src_data=data, box_ind=i) # write / save block data if data.ndim == 3: block = [ 0, data.shape[0], dest_box[1], dest_box[3], dest_box[0], dest_box[2] ] else: block = [ dest_box[1], dest_box[3], dest_box[0], dest_box[2] ] if file_is_hdf5: print('write data in block {} to file: {}'.format( block, outfile)) writefile.write_hdf5_block(outfile, data=data, datasetName=dsName, block=block, print_msg=False) else: dsDict[dsName][block[0]:block[1], block[2]:block[3]] = data # for binary file: ensure same data type if not file_is_hdf5: dsDict[dsName] = np.array(dsDict[dsName], dtype=data.dtype) # write binary file if not file_is_hdf5: atr['BANDS'] = len(dsDict.keys()) writefile.write(dsDict, out_file=outfile, metadata=atr, ref_file=infile) # create ISCE XML and GDAL VRT file if using ISCE lookup table file if inps.latFile and inps.lonFile: writefile.write_isce_xml(atr, fname=outfile) m, s = divmod(time.time() - start_time, 60) print('time used: {:02.0f} mins {:02.1f} secs.\n'.format(m, s)) return outfile
def subset_file(fname, subset_dict_input, out_file=None): """Subset file with Inputs: fname : str, path/name of file out_file : str, path/name of output file subset_dict : dict, subsut parameter, including the following items: subset_x : list of 2 int, subset in x direction, default=None subset_y : list of 2 int, subset in y direction, default=None subset_lat : list of 2 float, subset in lat direction, default=None subset_lon : list of 2 float, subset in lon direction, default=None fill_value : float, optional. filled value for area outside of data coverage. default=None None/not-existed to subset within data coverage only. tight : bool, tight subset or not, for lookup table file, i.e. geomap*.trans Outputs: out_file : str, path/name of output file; out_file = 'subset_'+fname, if fname is in current directory; out_file = fname, if fname is not in the current directory. """ # Input File Info atr = readfile.read_attribute(fname) width = int(atr['WIDTH']) length = int(atr['LENGTH']) k = atr['FILE_TYPE'] print('subset ' + k + ' file: ' + fname + ' ...') subset_dict = subset_dict_input.copy() # Read Subset Inputs into 4-tuple box in pixel and geo coord pix_box, geo_box = subset_input_dict2box(subset_dict, atr) coord = ut.coordinate(atr) # if fill_value exists and not None, subset data and fill assigned value for area out of its coverage. # otherwise, re-check subset to make sure it's within data coverage and initialize the matrix with np.nan outfill = False if 'fill_value' in subset_dict.keys() and subset_dict['fill_value']: outfill = True else: outfill = False if not outfill: pix_box = coord.check_box_within_data_coverage(pix_box) subset_dict['fill_value'] = np.nan geo_box = coord.box_pixel2geo(pix_box) data_box = (0, 0, width, length) print('data range in (x0,y0,x1,y1): {}'.format(data_box)) print('subset range in (x0,y0,x1,y1): {}'.format(pix_box)) print('data range in (W, N, E, S): {}'.format( coord.box_pixel2geo(data_box))) print('subset range in (W, N, E, S): {}'.format(geo_box)) if pix_box == data_box: print('Subset range == data coverage, no need to subset. Skip.') return fname # Calculate Subset/Overlap Index pix_box4data, pix_box4subset = get_box_overlap_index(data_box, pix_box) ########################### Data Read and Write ###################### # Output File Name if not out_file: if os.getcwd() == os.path.dirname(os.path.abspath(fname)): if 'tight' in subset_dict.keys() and subset_dict['tight']: out_file = '{}_tight{}'.format( os.path.splitext(fname)[0], os.path.splitext(fname)[1]) else: out_file = 'sub_' + os.path.basename(fname) else: out_file = os.path.basename(fname) print('writing >>> ' + out_file) # update metadata atr = attr.update_attribute4subset(atr, pix_box) # subset datasets one by one dsNames = readfile.get_dataset_list(fname) maxDigit = max([len(i) for i in dsNames]) ext = os.path.splitext(out_file)[1] if ext in ['.h5', '.he5']: # initiate the output file writefile.layout_hdf5(out_file, metadata=atr, ref_file=fname) # subset dataset one-by-one for dsName in dsNames: with h5py.File(fname, 'r') as fi: ds = fi[dsName] ds_shape = ds.shape ds_ndim = ds.ndim print('cropping {d} in {b} from {f} ...'.format( d=dsName, b=pix_box4data, f=os.path.basename(fname))) if ds_ndim == 2: # read data = ds[pix_box4data[1]:pix_box4data[3], pix_box4data[0]:pix_box4data[2]] # crop data_out = np.ones( (pix_box[3] - pix_box[1], pix_box[2] - pix_box[0]), data.dtype) * subset_dict['fill_value'] data_out[pix_box4subset[1]:pix_box4subset[3], pix_box4subset[0]:pix_box4subset[2]] = data data_out = np.array(data_out, dtype=data.dtype) # write block = [0, int(atr['LENGTH']), 0, int(atr['WIDTH'])] writefile.write_hdf5_block(out_file, data=data_out, datasetName=dsName, block=block, print_msg=True) if ds_ndim == 3: prog_bar = ptime.progressBar(maxValue=ds_shape[0]) for i in range(ds_shape[0]): # read data = ds[i, pix_box4data[1]:pix_box4data[3], pix_box4data[0]:pix_box4data[2]] # crop data_out = np.ones( (1, pix_box[3] - pix_box[1], pix_box[2] - pix_box[0]), data.dtype) * subset_dict['fill_value'] data_out[:, pix_box4subset[1]:pix_box4subset[3], pix_box4subset[0]:pix_box4subset[2]] = data # write block = [ i, i + 1, 0, int(atr['LENGTH']), 0, int(atr['WIDTH']) ] writefile.write_hdf5_block(out_file, data=data_out, datasetName=dsName, block=block, print_msg=False) prog_bar.update(i + 1, suffix='{}/{}'.format( i + 1, ds_shape[0])) prog_bar.close() print('finished writing to file: {}'.format(out_file)) else: # IO for binary files dsDict = dict() for dsName in dsNames: dsDict[dsName] = subset_dataset( fname, dsName, pix_box, pix_box4data, pix_box4subset, fill_value=subset_dict['fill_value']) writefile.write(dsDict, out_file=out_file, metadata=atr, ref_file=fname) # write extra metadata files for ISCE data files if os.path.isfile(fname + '.xml') or os.path.isfile(fname + '.aux.xml'): # write ISCE XML file dtype_gdal = readfile.NUMPY2GDAL_DATATYPE[atr['DATA_TYPE']] dtype_isce = readfile.GDAL2ISCE_DATATYPE[dtype_gdal] writefile.write_isce_xml(out_file, width=int(atr['WIDTH']), length=int(atr['LENGTH']), bands=len(dsDict.keys()), data_type=dtype_isce, scheme=atr['scheme'], image_type=atr['FILE_TYPE']) print(f'write file: {out_file}.xml') # write GDAL VRT file if os.path.isfile(fname + '.vrt'): from isceobj.Util.ImageUtil import ImageLib as IML img = IML.loadImage(out_file)[0] img.renderVRT() print(f'write file: {out_file}.vrt') return out_file
def multilook_file(infile, lks_y, lks_x, outfile=None, method='average', margin=[0, 0, 0, 0], max_memory=4): """ Multilook input file Parameters: infile - str, path of input file to be multilooked. lks_y - int, number of looks in y / row direction. lks_x - int, number of looks in x / column direction. margin - list of 4 int, number of pixels to be skipped during multilooking. useful for offset product, where the marginal pixels are ignored during cross correlation matching. outfile - str, path of output file Returns: outfile - str, path of output file """ lks_y = int(lks_y) lks_x = int(lks_x) # input file info atr = readfile.read_attribute(infile) length, width = int(atr['LENGTH']), int(atr['WIDTH']) k = atr['FILE_TYPE'] print('multilooking {} {} file: {}'.format(atr['PROCESSOR'], k, infile)) print('number of looks in y / azimuth direction: %d' % lks_y) print('number of looks in x / range direction: %d' % lks_x) print('multilook method: {}'.format(method)) # margin --> box if margin is not [0, 0, 0, 0]: # top, bottom, left, right box = (margin[2], margin[0], width - margin[3], length - margin[1]) print( 'number of pixels to skip in top/bottom/left/right boundaries: {}'. format(margin)) else: box = (0, 0, width, length) # output file name ext = os.path.splitext(infile)[1] if not outfile: if os.getcwd() == os.path.dirname(os.path.abspath(infile)): outfile = os.path.splitext(infile)[0] + '_' + str( lks_y) + 'alks_' + str(lks_x) + 'rlks' + ext else: outfile = os.path.basename(infile) # update metadata atr = attr.update_attribute4multilook(atr, lks_y, lks_x, box=box) if ext in ['.h5', '.he5']: writefile.layout_hdf5(outfile, metadata=atr, ref_file=infile) # read source data and multilooking dsNames = readfile.get_dataset_list(infile) maxDigit = max([len(i) for i in dsNames]) dsDict = dict() for dsName in dsNames: print('multilooking {d:<{w}} from {f} ...'.format( d=dsName, w=maxDigit, f=os.path.basename(infile))) # split in Y/row direction for IO for HDF5 only if ext in ['.h5', '.he5']: # calc step size with memory usage up to 4 GB with h5py.File(infile, 'r') as f: ds = f[dsName] ds_size = np.prod(ds.shape) * 4 num_step = int(np.ceil(ds_size * 4 / (max_memory * 1024**3))) row_step = int(np.rint(length / num_step / 10) * 10) row_step = max(row_step, 10) else: row_step = box[3] - box[1] num_step = int(np.ceil((box[3] - box[1]) / (row_step * lks_y))) for i in range(num_step): r0 = box[1] + row_step * lks_y * i r1 = box[1] + row_step * lks_y * (i + 1) r1 = min(r1, box[3]) # IO box box_i = (box[0], r0, box[2], r1) box_o = (int((box[0] - box[0]) / lks_x), int( (r0 - box[1]) / lks_y), int( (box[2] - box[0]) / lks_x), int((r1 - box[1]) / lks_y)) print('box: {}'.format(box_o)) # read / multilook if method == 'nearest': data = readfile.read(infile, datasetName=dsName, box=box_i, xstep=lks_x, ystep=lks_y, print_msg=False)[0] else: data = readfile.read(infile, datasetName=dsName, box=box_i, print_msg=False)[0] data = multilook_data(data, lks_y, lks_x) # output block if data.ndim == 3: block = [ 0, data.shape[0], box_o[1], box_o[3], box_o[0], box_o[2] ] else: block = [box_o[1], box_o[3], box_o[0], box_o[2]] # write if ext in ['.h5', '.he5']: writefile.write_hdf5_block(outfile, data=data, datasetName=dsName, block=block, print_msg=False) else: dsDict[dsName] = data # for binary file with 2 bands, always use BIL scheme if (len(dsDict.keys()) == 2 and os.path.splitext(infile)[1] not in ['.h5', '.he5'] and atr.get('scheme', 'BIL').upper() != 'BIL'): print('the input binary file has 2 bands with band interleave as: {}'. format(atr['scheme'])) print( 'for the output binary file, change the band interleave to BIL as default.' ) atr['scheme'] = 'BIL' if ext not in ['.h5', '.he5']: writefile.write(dsDict, out_file=outfile, metadata=atr, ref_file=infile) # write extra metadata files for ISCE data files if os.path.isfile(infile + '.xml') or os.path.isfile(infile + '.aux.xml'): # write ISCE XML file dtype_gdal = readfile.NUMPY2GDAL_DATATYPE[atr['DATA_TYPE']] dtype_isce = readfile.GDAL2ISCE_DATATYPE[dtype_gdal] writefile.write_isce_xml(outfile, width=int(atr['WIDTH']), length=int(atr['LENGTH']), bands=len(dsDict.keys()), data_type=dtype_isce, scheme=atr['scheme'], image_type=atr['FILE_TYPE']) print(f'write file: {outfile}.xml') # write GDAL VRT file if os.path.isfile(infile + '.vrt'): from isceobj.Util.ImageUtil import ImageLib as IML img = IML.loadImage(outfile)[0] img.renderVRT() print(f'write file: {outfile}.vrt') return outfile