def main(args): delta.config.modules.register_all() try: parser = argparse.ArgumentParser(description='Save Images from Config') config.setup_arg_parser(parser, ['general', 'io', 'dataset']) parser.add_argument("output_dir", help="Directory to save output to.") options = parser.parse_args(args) except argparse.ArgumentError: parser.print_help(sys.stderr) sys.exit(1) config.initialize(options) os.makedirs(options.output_dir, exist_ok=True) images = config.dataset.images() for (i, name) in enumerate(images): img = images.load(i) path = os.path.join( options.output_dir, os.path.splitext(os.path.basename(name))[0] + '.tiff') if os.path.exists(path): print(path + ' already exists, skipping.') else: print(name, '-->', path) tiff.write_tiff(path, image=img) return 0
def original_file(): tmpdir = tempfile.mkdtemp() image_path = os.path.join(tmpdir, 'image.tiff') label_path = os.path.join(tmpdir, 'label.tiff') (image, label) = generate_tile(width=32, height=64, blocks=50) tiff.write_tiff(image_path, image) tiff.write_tiff(label_path, label) yield (image_path, label_path) shutil.rmtree(tmpdir)
def incremental_tiff_filenames(): tmpdir = tempfile.mkdtemp() image_path = os.path.join(tmpdir, 'image.tiff') label_path = os.path.join(tmpdir, 'label.tiff') image = np.reshape(np.arange(0,1,.01), (10,10)) label = (image >= 0.5).astype(np.uint8) tiff.write_tiff(image_path, image) tiff.write_tiff(label_path, label) yield ([image_path], [label_path]) shutil.rmtree(tmpdir)
def binary_identity_tiff_filenames(): tmpdir = tempfile.mkdtemp() image_path = os.path.join(tmpdir, 'image.tiff') label_path = os.path.join(tmpdir, 'label.tiff') label = np.random.randint(0, 2, (128, 128), np.uint8) #pylint: disable=no-member image = np.take(np.asarray([[1.0, 0.0], [0.0, 1.0]]), label, axis=0) tiff.write_tiff(image_path, image) tiff.write_tiff(label_path, label) yield ([image_path], [label_path]) shutil.rmtree(tmpdir)
def doubling_tiff_filenames(): tmpdir = tempfile.mkdtemp() image_path = os.path.join(tmpdir, 'image.tiff') label_path = os.path.join(tmpdir, 'label.tiff') image = np.random.random((128, 128)) #pylint: disable=no-member label = 2 * image tiff.write_tiff(image_path, image) tiff.write_tiff(label_path, label) yield ([image_path], [label_path]) shutil.rmtree(tmpdir)
def test_geotiff_write(tmpdir): ''' Tests writing a landsat geotiff. ''' HEIGHT = 3 WIDTH = 5 numpy_image = np.zeros((HEIGHT, WIDTH), dtype=np.uint8) numpy_image[0, 0] = 0 numpy_image[0, 1] = 1 numpy_image[0, 2] = 2 numpy_image[0, 3] = 3 numpy_image[0, 4] = 4 numpy_image[2, 0] = 10 numpy_image[2, 1] = 11 numpy_image[2, 2] = 12 numpy_image[2, 3] = 13 numpy_image[2, 4] = 14 filename = str(tmpdir / 'test.tiff') write_tiff(filename, numpy_image) img = TiffImage(filename) data = np.squeeze(img.read()) assert numpy_image.shape == data.shape assert np.allclose(numpy_image, data) writer = TiffWriter(filename) writer.initialize((HEIGHT, WIDTH, 1), numpy_image.dtype) writer.write(numpy_image, 0, 0) writer.close() img = TiffImage(filename) data = np.squeeze(img.read()) assert numpy_image.shape == data.shape assert np.allclose(numpy_image, data)
def main(options): # TODO: Share the way this is done with in ml/train.py cpuOnly = (config.general.gpus() == 0) if cpuOnly: with tf.device('/cpu:0'): model = tf.keras.models.load_model(options.model, custom_objects=custom_objects(), compile=False) else: model = tf.keras.models.load_model(options.model, custom_objects=custom_objects(), compile=False) colors = list(map(lambda x: x.color, config.dataset.classes)) error_colors = np.array([[0x0, 0x0, 0x0], [0xFF, 0x00, 0x00]], dtype=np.uint8) if options.noColormap: colors = None # Forces raw one channel output start_time = time.time() images = config.dataset.images() labels = config.dataset.labels() net_name = os.path.splitext(os.path.basename(options.model))[0] full_cm = None if options.autoencoder: labels = None for (i, path) in enumerate(images): image = images.load(i) base_name = os.path.splitext(os.path.basename(path))[0] writer = image_writer('tiff') prob_image = writer(net_name + '_' + base_name + '.tiff') if options.prob else None output_image = writer(net_name + '_' + base_name + '.tiff') if not options.prob else None error_image = None label = None if labels: error_image = writer('errors_' + base_name + '.tiff') label = labels.load(i) assert image.size() == label.size( ), 'Image and label do not match.' ts = config.io.tile_size() if options.autoencoder: label = image predictor = predict.ImagePredictor( model, ts, output_image, True, None if options.noColormap else (ae_convert, np.uint8, 3)) else: predictor = predict.LabelPredictor(model, ts, output_image, True, colormap=colors, prob_image=prob_image, error_image=error_image, error_colors=error_colors) overlap = (options.overlap, options.overlap) try: if cpuOnly: with tf.device('/cpu:0'): predictor.predict(image, label, overlap=overlap) else: predictor.predict(image, label, overlap=overlap) except KeyboardInterrupt: print('\nAborted.') return 0 if labels: cm = predictor.confusion_matrix() if full_cm is None: full_cm = np.copy(cm).astype(np.int64) else: full_cm += cm for j in range(cm.shape[0]): print('%s--- Precision: %.2f%% Recall: %.2f%% Pixels: %d / %d' % \ (config.dataset.classes[j].name, 100 * cm[j,j] / np.sum(cm[:, j]), 100 * cm[j,j] / np.sum(cm[j, :]), int(np.sum(cm[j, :])), int(np.sum(cm)))) print('%.2f%% Correct: %s' % (float(np.sum(np.diag(cm)) / np.sum(cm) * 100), path)) save_confusion(cm, map(lambda x: x.name, config.dataset.classes), 'confusion_' + base_name + '.pdf') if options.autoencoder: write_tiff('orig_' + base_name + '.tiff', image.read() if options.noColormap else ae_convert(image.read()), metadata=image.metadata()) stop_time = time.time() if labels: for i in range(full_cm.shape[0]): print('%s--- Precision: %.2f%% Recall: %.2f%% Pixels: %d / %d' % \ (config.dataset.classes[i].name, 100 * full_cm[i,i] / np.sum(full_cm[:, i]), 100 * full_cm[i,i] / np.sum(full_cm[i, :]), int(np.sum(cm[j, :])), int(np.sum(cm)))) print('Elapsed time = ', stop_time - start_time) return 0
def chunk_images(mosaic=False): ''' Divides input images in env var: cropped_u8_dir to chips, written to env var: cropped_u8_chip_dir. Takes env variables in section: "## Variables- delta/image chips." Outputs geotiffs with correct geolocation info. Doesn't overwrite files by default, even if file exists in different batch dir. Outputs to the cropped_u8_chip_dir or [cropped_u8_chip_dir]/batch[n] if 'batch' is not 'None' ''' ## I/O if mosaic == False: # single tiles, in normal dir, e.g. /mnt/gcs/cropped-to-roi/uint8 roi_image_list = glob(os.path.join(cropped_u8_dir, '*.tif')) print('Mosaic: False') else: # operate on mosaics only in moaic dire, e.g. /mnt/gcs/cropped-to-roi/uint8/mosaics roi_image_list = glob(os.path.join( cropped_u8_mosaic_dir, '*.tif')) # e.g. /mnt/gcs/cropped-to-roi/uint8/mosaics print('Mosaic: True') ## I/O print(f'Images:\n') pprint.pprint(roi_image_list) imageSet = imagery_config.ImageSet(roi_image_list, 'tiff') if batch != None: # batch mode output_dir = os.path.join(cropped_u8_chip_dir, 'batch' + str(batch)) print('Batch mode: True') else: # normal, no batching output_dir = cropped_u8_chip_dir print('Batch mode: False') os.makedirs(output_dir, exist_ok=True) # print(f'Outputting chips to {output_dir}') ## Loop for i, file_pth in enumerate(roi_image_list): # [:1] for testing print(f'\n{file_pth}') image = imageSet.load(i) # full ROI image ## skip if commented out or not present in file_paths.yml ID = os.path.basename( file_pth )[: 40] # counting characters from the left works too (e.g. 'LC08_L2SP_012031_20201129_20210316_02_T1' or 'LC08_20170728_inuvik.tif'. The 'tif' ext doesn't matter bc mosaics names are shorter.) ## Loop to weed out paths that shouldn't be mosaicked if ( ID not in mosaic_names ): # if this is the case, then ID may be an actual landsat ID (or a mosaic name that is commented out in the yml) if ID not in file_inputs['bases'].keys( ): # Must be a mosaic or scene name that is commented out (or mispelled) print( f'Skipping ID {ID} bc it is commented out or not in list.' ) # This order matters! continue elif (mosaic == False) & ( file_inputs['bases'][ID]['for_mosaic'] == True ): # This is an input to a mosaic file, so I shouldn't running it. --OR-- ID is a commented out mosaic name and it will return an error. print( f'Skipping ID {ID} for now, bc this is not the mosaicked version, and even though I\'m not in mosaic mode, this file is supposed to be mosaicked ("for_mosaic" is True.)' ) continue elif (mosaic == True ): # unlikely to occur unless I make a filepath mistake print( f'Skipping ID {ID} for now, bc this is NOT a mosaicked scene, bu I\'m in mosaic mode.' ) continue else: # Continue to chunk this scene! Good to go. pass else: # If this is not the case, then the ID is actually a mosaic name, scene is not commented out, and file is mosaicked ## find one of corresponding Landsat IDs to use as a key idx = mosaic_names.index(ID) ls_id = list(file_inputs['bases'].keys())[idx] if (mosaic == True) & ( file_inputs['bases'][ls_id]['for_mosaic'] == True ): # ID is a mosaic name and I'm in mosaic mode, plus I've double checked that that scene is supposed to be mosaicked: good to go! pass elif (mosaic == True) & ( file_inputs['bases'][ls_id]['for_mosaic'] == False ): # Same except I've somehow messed up and not marked file as for mosaic. Unlikely raise RuntimeError( f'[EDK]: ID {ID} is a mosaic name, but "for_mosaic" is set to "False."' ) elif mosaic == False: # I found a scene that is mosaicked, but I'm no supposed to. Unikely, bc mosaic file would have to be in wrong dir. print( f'Skipping ID {ID} for now, bc this IS the mosaicked version and I\'m not in mosaic mode.' ) continue else: raise RuntimeError('[EDK]: Not sure what happened.') tiles = image.tiles(chip_shape, overlap_shape, partials=False, partials_overlap=True) # indexes to tile bounds. print(f'Max tiles to make: {len(tiles)}') skipOutputFlag = 0 # init emptyTileFlag = 0 # init for j in range(len(tiles)): tile_data = image.read(tiles[j]) if np.all(tile_data == np.uint8(nodata_val)): # image is empty! emptyTileFlag += 1 progress_bar('', j / len(tiles), ' Empty image:') continue # pprint.pprint(pd.DataFrame({'Tiles':tiles})[:10]) x = tiles[j].min_x y = tiles[j].min_y ## Decide if output already exists basename = os.path.basename(file_pth) chip_out_fname = basename.replace('.tif', f'_y{y:04}x{x:04}.tif') chip_out_pth = os.path.join(output_dir, chip_out_fname) if os.path.exists(os.path.join( cropped_u8_chip_dir, chip_out_fname)) | len( glob( os.path.join(cropped_u8_chip_dir, 'batch*', chip_out_fname)) ) > 0: # Can't get glob recursive to work... skipOutputFlag += 1 progress_bar('', j / len(tiles), ' Chip already exists:') continue ## Create new metadata for roi ## Explanation of [geotransform](https://gdal.org/tutorials/geotransforms_tut.html) geotransform_out = list( image.metadata() ['geotransform']) # recast to list so I can edit geotransform_out[0] = geotransform_out[0] + geotransform_out[1] * x geotransform_out[3] = geotransform_out[3] + geotransform_out[ 5] * y # not non-symmetric order for geotransform/affine matrix! metadata_chip = image.metadata( ) # re-initialize from pristine copy metadata_chip['geotransform'] = tuple(geotransform_out) ## Progress bar progress_bar('', j / len(tiles), ' Creating chips:') ## Output print(f'{j:>4} | ', end='') tiff.write_tiff(chip_out_pth, tile_data, nodata=nodata_val, metadata=metadata_chip) ## Testing if skipOutputFlag >= 1: if skipOutputFlag == j + 1 - emptyTileFlag: print( f'Skipping chunking image {i} b/c it already exists: {basename}' ) else: warnings.warn( f'Warning (EDK): finished chunking image {i} with {skipOutputFlag} chips already created and {j} chunks possible: {basename}' ) print(f'Created {j+1 - skipOutputFlag} chunks\n\n') else: print(f'Finished chunking image {i}: {basename}') print(f'Created {j+1} chunks\n\n')
def combine_chunks(scale=None): ''' Parses input chip directory to find common file basenames, then reconstructs one full ROI image for each unique basename. Uses env vars kernel and kernel_scale. Kernel can be None, 'gaussian', or 'rect', where rect has zeros along edge and is boolean. Doesn't overwrite output. Optional 'scale' param gives Gaussian kernel std [default: env var kernel_scale] If testing, look for '# testing-change'. Then change env var sr_dir to point to chip dir; and chip_shape to be = sr_chip_shape ''' ## Avoid overloading var kernel_scale unless specified in function call (allows me to run multiple times for different scales) if scale == None: scale = kernel_scale # read from env var, unless specified ## I/O chip_image_list = glob( os.path.join(sr_dir, '*.png') ) # cropped_u8_chip_dir for testing # HERE change to tiff if needed # testing-change # combined_dir = os.path.join(sr_dir, 'combined_scale_'+str(scale)) ## variables- SR ## Place here, not in env22.py, because value can be overwritten by fx argument combined_dir = f'/mnt/gcs/sr/v2/{upscale_ratio}x/overlap_{overlap_shape[0]}/batch{batch}/combined_scale_{kernel_scale}/' os.makedirs(combined_dir, exist_ok=True) ## Since I now have uneven filename length, I have to parse basename using number of '_' characters, not by length. chip_basenames = [] for name in chip_image_list: name_base = os.path.basename(name) sep_idx = [i.start() for i in re.finditer('_', name_base)] # seperator index chip_basenames.append( name_base[:sep_idx[-2]]) # works for both mosaic and orig paths unq_chip_basenames = np.unique( chip_basenames ) # find basenames for orig ROIs, e.g. LC08_L2SP_012031_20201129_20210316_02_T1_SR_B534_C print(f'Unique basenames:\n{unq_chip_basenames}') ## Loop for i, basename in enumerate( unq_chip_basenames ): # [:1] HERE change for testing 1:2 is Lincoln, :1 is Cumberland; 2:3 is Redberry; 3:4 is st denis ## Parse basenames and stiching indexes combined_out_name = str(basename) + '_sr_' + str(sr_res) + 'm.tif' combined_out_pth = os.path.join( combined_dir, combined_out_name) # path to write reconstructed image ## skip if commented out or not present in file_paths.yml ID = str(basename )[: 40] # hopefully counting characters from the left works too if (ID + '.tif' not in mosaic_names ): # if this is the case, then ID may be an actual landsat ID if ID not in file_inputs['bases'].keys( ): # ID must be commented out print( f'Skipping ID {ID+".tif"} bc it is not in input file yml list.' ) continue else: # ID is an actual landsat ID pass ## Prevent overwriting: if os.path.exists(combined_out_pth) | len( glob(os.path.join( f'/mnt/gcs/sr/v[0-9]/10x/overlap_{overlap_shape[0]}/batch[0-9]/combined_scale_{kernel_scale}', combined_out_name), recursive=True) ) > 0: # comment out to either skip or not # Search all batch dirs ## Skip outputs that exist print(f'Skipping output {i} bc it exists somewhere: {basename}') continue ## Overwrite outputs # print(f'Overwriting output ({i}) : {basename}') else: print(f'Combining chunks from image {i}: {basename}') chips = glob( os.path.join(sr_dir, basename + '*.png') ) # use cropped_u8_chip_dir for testing on non-SR # testing-change chip_origins_y = [ int(os.path.basename(name).split('_')[-2][1:5]) for name in chips ] chip_origins_x = [ int(os.path.basename(name).split('_')[-2][6:10]) for name in chips ] y_max = max(chip_origins_y) x_max = max(chip_origins_x) ## Assert correct number of tiles (Doesn't make sense now that I'm throwing out blank chips) metadata, georef_h, georef_w = lookup_georef_nodataMask( basename, useNodataMask=False) reconstructed_yDim = y_max * upscale_ratio + sr_chip_shape[0] reconstructed_xDim = x_max * upscale_ratio + sr_chip_shape[0] georef_sr_h = georef_h * upscale_ratio georef_sr_w = georef_w * upscale_ratio if (georef_sr_h, georef_sr_w) != (reconstructed_yDim, reconstructed_xDim): print( f'Error [EDK]: georeferenced mask {georef_sr_h, georef_sr_w} is different shape in x-y dim. than array {reconstructed_yDim, reconstructed_xDim}!' ) print(f'\tBasename: {str(basename)}') print(f'\tBase dir: {sr_dir}') print('Skipping this file.') continue ## init output array: all arrays beginning with C have dim of final image shape (large); beginning with K have dims of kernel (much smaller) C = np.zeros( (reconstructed_yDim, reconstructed_xDim, 3), dtype='float32' ) # "combined" array with dimensions parsed from max chip indexes, as float for division # Adds chips multiplied by K (kernel)# testing-change ## Create 3-d kernels with chip shape and full-size sum arrays of ROI image shape. if kernel == 'gaussian': K = np.repeat( gkern(sr_chip_shape[0], scale)[:, :, np.newaxis], 3, axis=2 ).astype( 'float32' ) # std of 48/4.8 = 10 # good default: scale = float(sr_chip_shape[0])/4.8 # testing-change elif kernel == 'rect': K = np.zeros( (sr_chip_shape[0], sr_chip_shape[1], 3), dtype='float32' ) # start all true- keep base kernel, [but add ability to modify to set some sides == 1] # testing-change K[kernRectBounds[0]:-kernRectBounds[0], kernRectBounds[1]:-kernRectBounds[1], :] = 1 elif kernel == None: K = np.ones((sr_chip_shape[0], sr_chip_shape[1], 3), dtype='float32') # testing-change else: # replace, not add chips raise RuntimeError('EDK: Undefined kernel') CKS = C.copy( ) # np.zeros(C.shape, dtype='float') # "C kernel sum"; Init this array as float for division ## Loop over chips for j, chip_pth in enumerate(chips): # chips[:30] ## Progress bar progress_bar('', j / len(chips), f'Combining chips: ({j}/{len(chips)})') ## Create delta tiffimage, load and read() chip_image = tiff.TiffImage(chip_pth) chip = chip_image.read( ) # + np.random.randint(0, 30) # TESTING: rand adds noise for testing. Beware overflow for uint8. ## Assert dtype, nodata val and dims if True: # j == 0: assert chip.dtype == 'uint8', "EDK: Check dtype." # verify hard-coded assumptions assert chip.shape[: 2] == sr_chip_shape, "EDK: chip shape." # verify hard-coded assumptions against env variable # testing-change assert chip_image.nodata_value( ) == nodata_val, "EDK: Check no-data value." # [STILL NEED TO TEST THIS AFTER COMMENTING OUT] verify hard-coded assumptions against env variable TODO: temporary commented out ## save metadata for output if first chip if j == 0: metadata_combined = chip_image.metadata( ) # re-initialize from pristine copy pass ## Add chip to output array, C, weighted by kernel C[chip_origins_y[j] * upscale_ratio:chip_origins_y[j] * upscale_ratio + sr_chip_shape[0], chip_origins_x[j] * upscale_ratio:chip_origins_x[j] * upscale_ratio + sr_chip_shape[1], :] += chip * K ## Sum kernels CKS[chip_origins_y[j] * upscale_ratio:chip_origins_y[j] * upscale_ratio + sr_chip_shape[0], chip_origins_x[j] * upscale_ratio:chip_origins_x[j] * upscale_ratio + sr_chip_shape[1], :] += K ## Division for weighted average, and apply mask # TODO: add small value in case KS has any zeros (for division) combined_out = ( C / CKS ) # weighted image divided by sum of kernels (Note that CKS can equal 0 in regions of image fill, and potentially due to underflow error) del C, CKS # save mem ## Make room for nodata value (don't trust any zeros in the SR image, only trust the resampled mask from og image) combined_out[ combined_out > 254] = 254 # to avoid ambiguity with over/underflow once I convert to uint8 combined_out = combined_out.astype('uint8') combined_out += 1 # int shift to free up zero value for no data ## Check if I need to look up georeferencing (this works around my issue: https://github.com/nasa/delta/issues/148) if metadata_combined['projection'] == '': metadata, nodataMask = lookup_georef_nodataMask( basename ) # note that this function has three return vals by default, but will have less if I specify in argument calls. See lookup_georef_nodataMask def. else: _, nodataMask = lookup_georef_nodataMask(basename) metadata = metadata_combined ## Raise error bc I need to copy code snippet to rescale image affine transform here. Should implement as function to avoid re-writing code. raise RuntimeError( '[EDK] Caution: using metadata from first image chip.') ## Apply upscaled nodata mask from LR image assert nodataMask.shape[: 2] == combined_out.shape[: 2], f'[EDK]: mask {nodataMask.shape[:2]} is different shape in x-y dim. than array {combined_out.shape[:2]}!' combined_out[ nodataMask] = nodata_val # appears soft-coded, but really nodata_val=0 is hardcoded in, because I have shifte up all uint8 pixel values by 1 ## Write out to combined_dir tiff.write_tiff(combined_out_pth, combined_out, nodata=nodata_val, metadata=metadata) print(f'Combined {j+1} chunks\n\n') ## Check/update projection if none exists from osgeo import gdal ds = gdal.Open(combined_out_pth) if ds.GetProjection() == '': ds.SetProjection( metadata["projection"] ) # CRS.from_string(metadata["projection"]).to_wkt() ds.FlushCache() # close file/buffer pass