def cir_image(self, overwrite): """ Creates CIR image """ plt.ioff() data_path = self.data_path for i, img in enumerate(self.img_list): print('Creating CIR image for {}'.format(img)) band_combo_dir = data_path / 'band_combos' stack_path = data_path / 'images' / img / 'stack' / 'stack.tif' cir_file = band_combo_dir / '{}'.format(img + '_cir_img' + '.png') try: band_combo_dir.mkdir(parents=True) except FileExistsError: pass if overwrite is False: if cir_file.exists(): print('RGB image already exists for ' + img) continue else: print('No RGB image for ' + img + ', creating one') # Get RGB image print('Stacking image') band_list = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7'] tif_stacker(data_path, img, band_list, features=False, overwrite=False) spectra_stack_path = data_path / 'images' / img / 'stack' / 'spectra_stack.tif' # Function to normalize the grid values def normalize(array): """Normalizes numpy arrays into scale 0.0 - 1.0""" array_min, array_max = np.nanmin(array), np.nanmax(array) return ((array - array_min) / (array_max - array_min)) print('Processing CIR image') with rasterio.open(spectra_stack_path, 'r') as f: nir, red, green = f.read(5), f.read(4), f.read(3) nir[nir == -999999] = np.nan red[red == -999999] = np.nan green[green == -999999] = np.nan nirn = normalize(nir) redn = normalize(red) greenn = normalize(green) cir = np.dstack((nirn, redn, greenn)) # Convert to PIL image, enhance, and save cir_img = Image.fromarray((cir * 255).astype(np.uint8())) cir_img = ImageEnhance.Contrast(cir_img).enhance(1.5) cir_img = ImageEnhance.Sharpness(cir_img).enhance(2) cir_img = ImageEnhance.Brightness(cir_img).enhance(2) print('Saving CIR image') cir_img.save(cir_file, dpi=(300, 300))
def training_bnn(img_list, pctls, feat_list_new, data_path, batch, **model_params): for j, img in enumerate(img_list): print(img + ': stacking tif, generating clouds') times = [] tif_stacker(data_path, img, feat_list_new, features=True, overwrite=False) cloud_generator(img, data_path, overwrite=False) for i, pctl in enumerate(pctls): print(img, pctl, '% CLOUD COVER') print('Preprocessing') tf.keras.backend.clear_session() data_train, data_vector_train, data_ind_train, feat_keep = preprocessing( data_path, img, pctl, feat_list_new, test=False) perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') data_vector_train[data_vector_train[:, perm_index] == 1, flood_index] = 0 data_vector_train = np.delete(data_vector_train, perm_index, axis=1) shape = data_vector_train.shape X_train, y_train = data_vector_train[:, 0:shape[1] - 1], data_vector_train[:, shape[1] - 1] y_train = to_categorical(y_train) D = len(set(y_train[:, 0])) # Target classes model_path = data_path / batch / 'models' / img metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format( img + '_clouds_' + str(pctl)) try: metrics_path.mkdir(parents=True) model_path.mkdir(parents=True) except FileExistsError: pass model_path = model_path / '{}'.format(img + '_clouds_' + str(pctl) + '.h5') print('Training model') start_time = time.time() aleatoric_model = get_aleatoric_uncertainty_model(X_train, y_train, **model_params, D=D) end_time = time.time() times.append(timer(start_time, end_time, False)) aleatoric_model.save(model_path) metrics_path = metrics_path.parent times = [float(i) for i in times] times = np.column_stack([pctls, times]) times_df = pd.DataFrame(times, columns=['cloud_cover', 'training_time']) times_df.to_csv(metrics_path / 'training_times.csv', index=False)
def log_reg_training(img_list, pctls, feat_list_new, data_path, batch): for j, img in enumerate(img_list): print(img + ': stacking tif, generating clouds') times = [] tif_stacker(data_path, img, feat_list_new, features=True, overwrite=False) cloud_generator(img, data_path, overwrite=False) for i, pctl in enumerate(pctls): print(img, pctl, '% CLOUD COVER') print('Preprocessing') data_train, data_vector_train, data_ind_train, feat_keep = preprocessing( data_path, img, pctl, feat_list_new, test=False) perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') # data_vector_train[data_vector_train[:, perm_index] == 1, flood_index] = 0 data_vector_train = np.delete(data_vector_train, perm_index, axis=1) shape = data_vector_train.shape X_train, y_train = data_vector_train[:, 0:shape[1] - 1], data_vector_train[:, shape[1] - 1] model_path = data_path / batch / 'models' / img metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format( img + '_clouds_' + str(pctl)) if not model_path.exists(): model_path.mkdir(parents=True) if not metrics_path.exists(): metrics_path.mkdir(parents=True) model_path = model_path / '{}'.format(img + '_clouds_' + str(pctl) + '.sav') print('Training') start_time = time.time() logreg = LogisticRegression(n_jobs=-1, solver='sag') logreg.fit(X_train, y_train) end_time = time.time() times.append(timer(start_time, end_time, False)) joblib.dump(logreg, model_path) metrics_path = metrics_path.parent times = [float(i) for i in times] times = np.column_stack([pctls, times]) times_df = pd.DataFrame(times, columns=['cloud_cover', 'training_time']) times_df.to_csv(metrics_path / 'training_times.csv', index=False)
def cir_image(self): """ Creates CIR image """ plt.ioff() data_path = self.data_path for i, img in enumerate(self.img_list): print('Creating FN/FP map for {}'.format(img)) plot_path = data_path / self.batch / 'plots' / img bin_file = data_path / self.batch / 'predictions' / img / 'predictions.h5' stack_path = data_path / 'images' / img / 'stack' / 'stack.tif' # Get RGB image print('Stacking image') band_list = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7'] tif_stacker(data_path, img, band_list, features=False, overwrite=False) spectra_stack_path = data_path / 'images' / img / 'stack' / 'spectra_stack.tif' # Function to normalize the grid values def normalize(array): """Normalizes numpy arrays into scale 0.0 - 1.0""" array_min, array_max = np.nanmin(array), np.nanmax(array) return ((array - array_min) / (array_max - array_min)) print('Processing CIR image') with rasterio.open(spectra_stack_path, 'r') as f: nir, red, green = f.read(5), f.read(4), f.read(3) nir[nir == -999999] = np.nan red[red == -999999] = np.nan green[green == -999999] = np.nan nirn = normalize(nir) redn = normalize(red) greenn = normalize(green) cir = np.dstack((nirn, redn, greenn)) # Convert to PIL image, enhance, and save cir_img = Image.fromarray((cir * 255).astype(np.uint8())) cir_img = ImageEnhance.Contrast(cir_img).enhance(1.5) cir_img = ImageEnhance.Sharpness(cir_img).enhance(2) cir_img = ImageEnhance.Brightness(cir_img).enhance(2) print('Saving CIR image') cir_file = plot_path / '{}'.format('cir_img' + '.png') cir_img.save(cir_file, dpi=(300, 300))
def rgb_image(self, percent, overwrite): def linear_stretch(input, percent): p_low, p_high = np.percentile(input[~np.isnan(input)], (percent, 100 - percent)) img_rescale = exposure.rescale_intensity(input, in_range=(p_low, p_high)) return img_rescale for img in self.img_list: spectra_stack_path = data_path / 'images' / img / 'stack' / 'spectra_stack.tif' band_combo_dir = data_path / 'band_combos' rgb_file = band_combo_dir / '{}'.format(img + '_rgb_img' + '.png') try: band_combo_dir.mkdir(parents=True) except FileExistsError: pass if overwrite is False: if rgb_file.exists(): print('RGB image already exists for ' + img) continue else: print('No RGB image for ' + img + ', creating one') print('Stacking RGB image') band_list = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7'] tif_stacker(data_path, img, band_list, features=False, overwrite=False) print('Processing RGB image') with rasterio.open(spectra_stack_path, 'r') as f: red, green, blue = f.read(4), f.read(3), f.read(2) red[red == -999999] = np.nan green[green == -999999] = np.nan blue[blue == -999999] = np.nan rgb = np.dstack((red, green, blue)) rgb = linear_stretch(rgb, percent) rgb_img = Image.fromarray((rgb * 255).astype(np.uint8())) # rgb_img = ImageEnhance.Contrast(rgb_img).enhance(1.2) print('Saving RGB image') rgb_img.save(rgb_file, dpi=(300, 300))
def false_map(self): """ Creates map of FP/FNs overlaid on RGB image """ plt.ioff() data_path = self.data_path for i, img in enumerate(self.img_list): print('Creating FN/FP map for {}'.format(img)) plot_path = data_path / self.batch / 'plots' / img bin_file = data_path / self.batch / 'predictions' / img / 'predictions.h5' stack_path = data_path / 'images' / img / 'stack' / 'stack.tif' # Get RGB image print('Stacking RGB image') band_list = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7'] tif_stacker(data_path, img, band_list, features=False, overwrite=False) spectra_stack_path = data_path / 'images' / img / 'stack' / 'spectra_stack.tif' # Function to normalize the grid values def normalize(array): """Normalizes numpy arrays into scale 0.0 - 1.0""" array_min, array_max = np.nanmin(array), np.nanmax(array) return ((array - array_min) / (array_max - array_min)) print('Processing RGB image') with rasterio.open(spectra_stack_path, 'r') as f: red, green, blue = f.read(4), f.read(3), f.read(2) red[red == -999999] = np.nan green[green == -999999] = np.nan blue[blue == -999999] = np.nan redn = normalize(red) greenn = normalize(green) bluen = normalize(blue) rgb = np.dstack((redn, greenn, bluen)) # Convert to PIL image, enhance, and save rgb_img = Image.fromarray((rgb * 255).astype(np.uint8())) rgb_img = ImageEnhance.Contrast(rgb_img).enhance(1.5) rgb_img = ImageEnhance.Sharpness(rgb_img).enhance(2) rgb_img = ImageEnhance.Brightness(rgb_img).enhance(2) print('Saving RGB image') rgb_file = plot_path / '{}'.format('rgb_img' + '.png') rgb_img.save(rgb_file, dpi=(300, 300)) # Reshape predicted values back into image band with rasterio.open(stack_path, 'r') as ds: shape = ds.read(1).shape # Shape of full original image for pctl in self.pctls: data_test, data_vector_test, data_ind_test, feat_keep = preprocessing( data_path, img, pctl, self.feat_list_new, test=True) for buffer_iter in self.buffer_iters: print('Fetching flood predictions for buffer', buffer_iter, 'at', str(pctl) + '{}'.format('%')) # Read predictions with h5py.File(bin_file, 'r') as f: pred_name = str(pctl) + '_buff_' + str(buffer_iter) predictions = f[pred_name] predictions = np.array( predictions) # Copy h5 dataset to array # Add predicted values to cloud-covered pixel positions prediction_img = np.zeros(shape) prediction_img[:] = np.nan rows, cols = zip(data_ind_test) prediction_img[rows, cols] = predictions # Remove perm water from predictions and actual perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') data_vector_test[ data_vector_test[:, perm_index] == 1, flood_index] = 0 # Remove flood water that is perm water data_shape = data_vector_test.shape with rasterio.open(stack_path, 'r') as ds: perm_feat = ds.read(perm_index + 1) prediction_img[perm_feat == 1] = 0 # Add actual flood values to cloud-covered pixel positions flooded_img = np.zeros(shape) flooded_img[:] = np.nan flooded_img[rows, cols] = data_vector_test[:, data_shape[1] - 1] # Visualizing FNs/FPs ones = np.ones(shape=shape) red_actual = np.where(ones, flooded_img, 0.5) # Actual blue_preds = np.where(ones, prediction_img, 0.5) # Predictions green_combo = np.minimum(red_actual, blue_preds) # Saving FN/FP comparison image comparison_img = np.dstack( (red_actual, green_combo, blue_preds)) comparison_img_file = plot_path / '{}'.format( 'false_map' + str(pctl) + '_buff_' + str(buffer_iter) + '.png') print('Saving FN/FP image for buffer', str(buffer_iter), 'at', str(pctl) + '{}'.format('%')) matplotlib.image.imsave(comparison_img_file, comparison_img, dpi=300) # Load comparison image flood_overlay = Image.open(comparison_img_file) # Convert black pixels to transparent in comparison image so it can overlay RGB datas = flood_overlay.getdata() newData = [] for item in datas: if item[0] == 0 and item[1] == 0 and item[2] == 0: newData.append((255, 255, 255, 0)) else: newData.append(item) flood_overlay.putdata(newData) # Superimpose comparison image and RGB image, then save and close rgb_img.paste(flood_overlay, (0, 0), flood_overlay) plt.imshow(rgb_img) print('Saving overlay image for buffer', str(buffer_iter), 'at', str(pctl) + '{}'.format('%')) rgb_img.save( plot_path / '{}'.format('false_map_overlay' + str(pctl) + '_buff_' + str(buffer_iter) + '.png'), dpi=(300, 300)) plt.close('all')
def log_reg_training_buffer(img_list, pctls, feat_list_new, data_path, batch, buffer_iters, buffer_flood_only): from imageio import imwrite for img in img_list: print(img + ': stacking tif, generating clouds') times = [] tif_stacker(data_path, img, feat_list_new, features=True, overwrite=False) cloud_generator(img, data_path, overwrite=False) for pctl in pctls: print('Preprocessing') data_train_full, data_vector_train, data_ind_train, feat_keep = preprocessing( data_path, img, pctl, feat_list_new, test=False) for buffer_iter in buffer_iters: perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') data_train = data_train_full.copy() if buffer_flood_only: data_train[data_train[:, :, perm_index] == 1, flood_index] = 0 mask = data_train[:, :, flood_index] buffer_mask = np.invert( binary_dilation(mask, iterations=buffer_iter)) else: mask = data_train[:, :, flood_index] buffer_mask = np.invert( binary_dilation(mask, iterations=buffer_iter)) data_train[data_train[:, :, perm_index] == 1, flood_index] = 0 data_train[buffer_mask] = np.nan data_vector_train = data_train.reshape([ data_train.shape[0] * data_train.shape[1], data_train.shape[2] ]) data_vector_train = data_vector_train[ ~np.isnan(data_vector_train).any(axis=1)] data_vector_train = np.delete( data_vector_train, perm_index, axis=1) # Remove perm water column shape = data_vector_train.shape X_train, y_train = data_vector_train[:, 0:shape[ 1] - 1], data_vector_train[:, shape[1] - 1] model_path = data_path / batch / 'models' / img metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format( img + '_clouds_' + str(pctl)) if not model_path.exists(): model_path.mkdir(parents=True) if not metrics_path.exists(): metrics_path.mkdir(parents=True) model_path = model_path / '{}'.format(img + '_clouds_' + str( pctl) + 'buff' + str(buffer_iter) + '.sav') # Save data flooding image to check that buffering is working correctly # imwrite(model_path.parents[0] / '{}'.format('buff' + str(buffer_iter) + '.jpg'), data_train[:, :, 6]) print('Training') start_time = time.time() logreg = LogisticRegression(n_jobs=-1, solver='sag') logreg.fit(X_train, y_train) end_time = time.time() times.append(timer(start_time, end_time, False)) joblib.dump(logreg, model_path) metrics_path = metrics_path.parent times = [float(i) for i in times] times = np.column_stack([ np.repeat(pctls, len(buffer_iters)), np.tile(buffer_iters, len(pctls)), times ]) times_df = pd.DataFrame( times, columns=['cloud_cover', 'buffer_iters', 'training_time']) times_df.to_csv(metrics_path / 'training_times.csv', index=False)
# tf.config.experimental.set_visible_devices(NUM_PARALLEL_EXEC_UNITS, 'CPU') os.environ["OMP_NUM_THREADS"] = str(NUM_PARALLEL_EXEC_UNITS) os.environ["KMP_BLOCKTIME"] = "30" os.environ["KMP_SETTINGS"] = "1" os.environ["KMP_AFFINITY"] = "granularity=fine,verbose,compact,1,0" # ====================================================================================================================== img = img_list[0] pctl = 30 batch = 'test' import statsmodels.api as sm print(img + ': stacking tif, generating clouds') times = [] tif_stacker(data_path, img, feat_list_new, features=True, overwrite=False) cloud_generator(img, data_path, overwrite=False) print(img, pctl, '% CLOUD COVER') print('Preprocessing') tf.keras.backend.clear_session() data_train, data_vector_train, data_ind_train, feat_keep = preprocessing( data_path, img, pctl, feat_list_new, test=False) perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') data_vector_train[data_vector_train[:, perm_index] == 1, flood_index] = 0 # Remove flood water that is perm water data_vector_train = np.delete(data_vector_train, perm_index, axis=1) # Remove perm water column shape = data_vector_train.shape X_train, y_train = data_vector_train[:, 0:shape[1] -
def NN_training(img_list, pctls, model_func, feat_list_new, data_path, batch, **model_params): get_model = model_func for j, img in enumerate(img_list): print(img + ': stacking tif, generating clouds') times = [] lr_mins = [] lr_maxes = [] tif_stacker(data_path, img, feat_list_new, features=True, overwrite=False) cloud_generator(img, data_path, overwrite=False) for i, pctl in enumerate(pctls): print(img, pctl, '% CLOUD COVER') print('Preprocessing') tf.keras.backend.clear_session() data_train, data_vector_train, data_ind_train, feat_keep = preprocessing( data_path, img, pctl, feat_list_new, test=False) perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') # data_vector_train[data_vector_train[:, perm_index] == 1, flood_index] = 0 data_vector_train = np.delete(data_vector_train, perm_index, axis=1) shape = data_vector_train.shape X_train, y_train = data_vector_train[:, 0:shape[1] - 1], data_vector_train[:, shape[1] - 1] INPUT_DIMS = X_train.shape[1] model_path = data_path / batch / 'models' / img metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format( img + '_clouds_' + str(pctl)) lr_plots_path = metrics_path.parents[1] / 'lr_plots' lr_vals_path = metrics_path.parents[1] / 'lr_vals' try: metrics_path.mkdir(parents=True) model_path.mkdir(parents=True) lr_plots_path.mkdir(parents=True) lr_vals_path.mkdir(parents=True) except FileExistsError: pass # --------------------------------------------------------------------------------------------------- # Determine learning rate by finding max loss decrease during single epoch training lrRangeFinder = LrRangeFinder(start_lr=0.1, end_lr=2) lr_model_params = { 'batch_size': model_params['batch_size'], 'epochs': 1, 'verbose': 2, 'callbacks': [lrRangeFinder], 'use_multiprocessing': True } model = model_func(INPUT_DIMS) print('Finding learning rate') model.fit(X_train, y_train, **lr_model_params) lr_min, lr_max, lr, losses = lr_plots(lrRangeFinder, lr_plots_path, img, pctl) lr_mins.append(lr_min) lr_maxes.append(lr_max) # --------------------------------------------------------------------------------------------------- # Training the model with cyclical learning rate scheduler model_path = model_path / '{}'.format(img + '_clouds_' + str(pctl) + '.h5') scheduler = SGDRScheduler(min_lr=lr_min, max_lr=lr_max, lr_decay=0.9, cycle_length=3, mult_factor=1.5) callbacks = [ tf.keras.callbacks.EarlyStopping( monitor='sparse_categorical_accuracy', min_delta=0.0001, patience=10), tf.keras.callbacks.ModelCheckpoint(filepath=str(model_path), monitor='loss', save_best_only=True), CSVLogger(metrics_path / 'training_log.log'), scheduler ] model = get_model(INPUT_DIMS) print('Training full model with best LR') start_time = time.time() model.fit(X_train, y_train, **model_params, callbacks=callbacks) end_time = time.time() times.append(timer(start_time, end_time, False)) # model.save(model_path) metrics_path = metrics_path.parent times = [float(i) for i in times] times = np.column_stack([pctls, times]) times_df = pd.DataFrame(times, columns=['cloud_cover', 'training_time']) times_df.to_csv(metrics_path / 'training_times.csv', index=False) lr_range = np.column_stack([pctls, lr_mins, lr_maxes]) lr_avg = np.mean(lr_range[:, 1:2], axis=1) lr_range = np.column_stack([lr_range, lr_avg]) lr_range_df = pd.DataFrame( lr_range, columns=['cloud_cover', 'lr_min', 'lr_max', 'lr_avg']) lr_range_df.to_csv((lr_vals_path / img).with_suffix('.csv'), index=False) losses_path = lr_vals_path / img / '{}'.format('losses_' + str(pctl) + '.csv') try: losses_path.parent.mkdir(parents=True) except FileExistsError: pass lr_losses = np.column_stack([lr, losses]) lr_losses = pd.DataFrame(lr_losses, columns=['lr', 'losses']) lr_losses.to_csv(losses_path, index=False)
def training2(img_list, pctls, model_func, feat_list_new, data_path, batch, DROPOUT_RATE=0, HOLDOUT=0.3, **model_params): ''' Removes flood water that is permanent water ''' get_model = model_func for j, img in enumerate(img_list): times = [] tif_stacker(data_path, img, feat_list_new, features=True, overwrite=True) cloud_generator(img, data_path, overwrite=False) for i, pctl in enumerate(pctls): data_train, data_vector_train, data_ind_train = preprocessing( data_path, img, pctl, gaps=False) perm_index = feat_list_new.index('GSW_perm') flood_index = feat_list_new.index('flooded') data_vector_train[ data_vector_train[:, perm_index] == 1, flood_index] = 0 # Remove flood water that is perm water data_vector_train = np.delete(data_vector_train, perm_index, axis=1) # Remove perm water column training_data, validation_data = train_val(data_vector_train, holdout=HOLDOUT) X_train, y_train = training_data[:, 0:14], training_data[:, 14] X_val, y_val = validation_data[:, 0:14], validation_data[:, 14] INPUT_DIMS = X_train.shape[1] model_path = data_path / batch / 'models' / img metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format( img + '_clouds_' + str(pctl)) try: metrics_path.mkdir(parents=True) model_path.mkdir(parents=True) except FileExistsError: pass model_path = model_path / '{}'.format(img + '_clouds_' + str(pctl) + '.h5') csv_logger = CSVLogger(metrics_path / 'training_log.log') model_params['callbacks'].append(csv_logger) print('~~~~~', img, pctl, '% CLOUD COVER') model = get_model(INPUT_DIMS) start_time = time.time() model.fit(X_train, y_train, **model_params, validation_data=(X_val, y_val)) end_time = time.time() times.append(timer(start_time, end_time, False)) model.save(model_path) metrics_path = metrics_path.parent times = [float(i) for i in times] times = np.column_stack([pctls, times]) times_df = pd.DataFrame(times, columns=['cloud_cover', 'training_time']) times_df.to_csv(metrics_path / 'training_times.csv', index=False)
def training6(img_list, pctls, model_func, feat_list_new, data_path, batch, T, dropout_rate=0.2, **model_params): ''' 1. Removes ALL pixels that are over permanent water 2. Finds the optimum learning rate and uses cyclic LR scheduler to train the model 3. No validation set for training 4. ''' get_model = model_func for j, img in enumerate(img_list): print(img + ': stacking tif, generating clouds') times = [] tif_stacker(data_path, img, feat_list_new, features=True, overwrite=False) cloud_generator(img, data_path, overwrite=False) for i, pctl in enumerate(pctls): print(img, pctl, '% CLOUD COVER') print('Preprocessing') tf.keras.backend.clear_session() data_train, data_vector_train, data_ind_train, feat_keep = preprocessing( data_path, img, pctl, feat_list_new, gaps=False) perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') data_vector_train[ data_vector_train[:, perm_index] == 1, flood_index] = 0 # Remove flood water that is perm water data_vector_train = np.delete(data_vector_train, perm_index, axis=1) # Remove perm water column shape = data_vector_train.shape X_train, y_train = data_vector_train[:, 0:shape[1] - 1], data_vector_train[:, shape[1] - 1] y_train = to_categorical(y_train) INPUT_DIMS = X_train.shape[1] model_path = data_path / batch / 'models' / img metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format( img + '_clouds_' + str(pctl)) try: metrics_path.mkdir(parents=True) model_path.mkdir(parents=True) except FileExistsError: pass model_path = model_path / '{}'.format(img + '_clouds_' + str(pctl) + '.h5') callbacks = [ tf.keras.callbacks.EarlyStopping( monitor='softmax_output_categorical_accuracy', min_delta=0.005, patience=5), tf.keras.callbacks.ModelCheckpoint(filepath=str(model_path), monitor='loss', save_best_only=True), CSVLogger(metrics_path / 'training_log.log') ] start_time = time.time() model = get_model(model_params['epochs'], X_train, y_train, X_train.shape, T, D=2, batch_size=model_params['batch_size'], dropout_rate=dropout_rate, callbacks=callbacks) end_time = time.time() times.append(timer(start_time, end_time, False)) # model.save(model_path) metrics_path = metrics_path.parent times = [float(i) for i in times] times = np.column_stack([pctls, times]) times_df = pd.DataFrame(times, columns=['cloud_cover', 'training_time']) times_df.to_csv(metrics_path / 'training_times.csv', index=False)
def rf_training(img_list, pctls, feat_list_new, data_path, batch, n_jobs): for j, img in enumerate(img_list): print(img + ': stacking tif, generating clouds') times = [] tif_stacker(data_path, img, feat_list_new, features=True, overwrite=False) cloud_generator(img, data_path, overwrite=False) for i, pctl in enumerate(pctls): print(img, pctl, '% CLOUD COVER') print('Preprocessing') tf.keras.backend.clear_session() data_train, data_vector_train, data_ind_train, feat_keep = preprocessing(data_path, img, pctl, feat_list_new, test=False) perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') data_vector_train[ data_vector_train[:, perm_index] == 1, flood_index] = 0 # Remove flood water that is perm water data_vector_train = np.delete(data_vector_train, perm_index, axis=1) # Remove perm water column shape = data_vector_train.shape X_train, y_train = data_vector_train[:, 0:shape[1] - 1], data_vector_train[:, shape[1] - 1] model_path = data_path / batch / 'models' / img metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format( img + '_clouds_' + str(pctl)) try: metrics_path.mkdir(parents=True) model_path.mkdir(parents=True) except FileExistsError: pass param_path = data_path / batch / 'models' / '4514_LC08_027033_20170826_1' / '{}'.format( '4514_LC08_027033_20170826_1_clouds_50params.pkl') model_path = model_path / '{}'.format(img + '_clouds_' + str(pctl) + '.sav') # # Hyperparameter optimization # print('Hyperparameter search') # base_rf = RandomForestClassifier(random_state=0, n_estimators=100, max_leaf_nodes=10) # space = [skopt.space.Integer(2, 1000, name="max_leaf_nodes"), # skopt.space.Integer(2, 200, name="n_estimators"), # skopt.space.Integer(2, 3000, name="max_depth")] # @use_named_args(space) # def objective(**params): # base_rf.set_params(**params) # return -np.mean(cross_val_score(base_rf, X_train, y_train, cv=5, n_jobs=n_jobs, scoring="f1")) # res_rf = forest_minimize(objective, space, base_estimator='RF', n_calls=11, # random_state=0, verbose=True, n_jobs=n_jobs) # print(type(res_rf)) # skopt.utils.dump(res_rf, param_path, store_objective=False) res_rf = skopt.utils.load(param_path) # Training print('Training with optimized hyperparameters') start_time = time.time() rf = RandomForestClassifier(random_state=0, max_leaf_nodes=res_rf.x[0], n_estimators=res_rf.x[1], max_depth=res_rf.x[2], n_jobs=-1) rf.fit(X_train, y_train) end_time = time.time() times.append(timer(start_time, end_time, False)) joblib.dump(rf, model_path) metrics_path = metrics_path.parent times = [float(i) for i in times] times = np.column_stack([pctls, times]) times_df = pd.DataFrame(times, columns=['cloud_cover', 'training_time']) times_df.to_csv(metrics_path / 'training_times.csv', index=False)
def log_reg_training_sample(img_list, pctls, feat_list_new, feat_list_all, data_path, batch, n_flood, n_nonflood): for img in img_list: print(img + ': stacking tif, generating clouds') times = [] tif_stacker(data_path, img, feat_list_new, overwrite=False) cloud_generator(img, data_path, overwrite=False) for pctl in pctls: print(img, pctl, '% CLOUD COVER') print('Preprocessing') sample_coords, data_train = get_sample_coords( img, pctl, n_flood, n_nonflood) perm_index = data_train.shape[2] - 2 flood_index = data_train.shape[2] - 1 data_vector_train = get_sample_data(sample_coords, data_train) data_vector_train, scaler = standardize_data(data_vector_train) data_vector_train = np.delete(data_vector_train, perm_index, axis=1) # Remove perm water column shape = data_vector_train.shape X_train, y_train = data_vector_train[:, 0:shape[1] - 1], data_vector_train[:, shape[1] - 1] model_path = data_path / batch / 'models' / img metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format( img + '_clouds_' + str(pctl)) scaler_dir = data_path / 'scalers' / img if not model_path.exists(): model_path.mkdir(parents=True) if not metrics_path.exists(): metrics_path.mkdir(parents=True) if not scaler_dir.exists(): scaler_dir.mkdir(parents=True) model_path = data_path / batch / 'models' / img / '{}'.format( img + '_clouds_' + str(pctl) + '.sav') scaler_path = scaler_dir / '{}_clouds_{}_scaler_.sav'.format( img, str(pctl)) joblib.dump(scaler, scaler_path) print('Training') start_time = time.time() logreg = LogisticRegression(solver='lbfgs') logreg.fit(X_train, y_train) end_time = time.time() times.append(timer(start_time, end_time, False)) joblib.dump(logreg, model_path) del data_train, data_vector_train, logreg metrics_path = metrics_path.parent times = [float(i) for i in times] times = np.column_stack([pctls, times]) times_df = pd.DataFrame(times, columns=['cloud_cover', 'training_time']) times_df.to_csv(metrics_path / 'training_times.csv', index=False)
# Plot uncertainty and FP/FN import matplotlib.pyplot as plt import rasterio from PIL import Image, ImageEnhance print('Creating FN/FP map for {}'.format(img)) plot_path = data_path / batch / 'plots' / img bin_file = data_path / batch / 'predictions' / img / 'predictions.h5' stack_path = data_path / 'images' / img / 'stack' / 'stack.tif' # Get RGB image print('Stacking RGB image') band_list = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7'] tif_stacker(data_path, img, band_list, features=False, overwrite=False) spectra_stack_path = data_path / 'images' / img / 'stack' / 'spectra_stack.tif' # Function to normalize the grid values def normalize(array): """Normalizes numpy arrays into scale 0.0 - 1.0""" array_min, array_max = np.nanmin(array), np.nanmax(array) return ((array - array_min) / (array_max - array_min)) print('Processing RGB image') with rasterio.open(spectra_stack_path, 'r') as f: red, green, blue = f.read(4), f.read(3), f.read(2) red[red == -999999] = np.nan green[green == -999999] = np.nan
def false_map(probs, data_path, save=True): """ Creates map of FP/FNs overlaid on RGB image save : bool If true, saves RGB FP/FN overlay image. If false, just saves FP/FN overlay """ plt.ioff() for i, img in enumerate(img_list): print('Creating FN/FP map for {}'.format(img)) plot_path = data_path / batch / 'plots' / img bin_file = data_path / batch / 'predictions' / img / 'predictions.h5' stack_path = data_path / 'images' / img / 'stack' / 'stack.tif' # Get RGB image print('Stacking RGB image') band_list = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7'] tif_stacker(data_path, img, band_list, features=False, overwrite=False) spectra_stack_path = data_path / 'images' / img / 'stack' / 'spectra_stack.tif' # Function to normalize the grid values def normalize(array): """Normalizes numpy arrays into scale 0.0 - 1.0""" array_min, array_max = np.nanmin(array), np.nanmax(array) return ((array - array_min) / (array_max - array_min)) print('Processing RGB image') with rasterio.open(spectra_stack_path, 'r') as f: red, green, blue = f.read(4), f.read(3), f.read(2) red[red == -999999] = np.nan green[green == -999999] = np.nan blue[blue == -999999] = np.nan redn = normalize(red) greenn = normalize(green) bluen = normalize(blue) rgb = np.dstack((redn, greenn, bluen)) # Convert to PIL image, enhance, and save rgb_img = Image.fromarray((rgb * 255).astype(np.uint8())) rgb_img = ImageEnhance.Contrast(rgb_img).enhance(1.5) rgb_img = ImageEnhance.Sharpness(rgb_img).enhance(2) rgb_img = ImageEnhance.Brightness(rgb_img).enhance(2) print('Saving RGB image') rgb_file = plot_path / '{}'.format('rgb_img' + '.png') rgb_img.save(rgb_file, dpi=(300, 300)) # Reshape predicted values back into image band with rasterio.open(stack_path, 'r') as ds: shape = ds.read(1).shape # Shape of full original image for j, pctl in enumerate(pctls): print('Fetching flood predictions for', str(pctl) + '{}'.format('%')) # Read predictions with h5py.File(bin_file, 'r') as f: if probs: prediction_probs = f[str(pctl)] prediction_probs = np.array( prediction_probs) # Copy h5 dataset to array predictions = np.argmax(prediction_probs, axis=1) else: predictions = f[str(pctl)] predictions = np.array( predictions) # Copy h5 dataset to array data_test, data_vector_test, data_ind_test, feat_keep = preprocessing( data_path, img, pctl, feat_list_new, test=True) # Add predicted values to cloud-covered pixel positions prediction_img = np.zeros(shape) prediction_img[:] = np.nan rows, cols = zip(data_ind_test) prediction_img[rows, cols] = predictions # Remove perm water from predictions and actual perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') data_vector_test[ data_vector_test[:, perm_index] == 1, flood_index] = 0 # Remove flood water that is perm water data_shape = data_vector_test.shape with rasterio.open(stack_path, 'r') as ds: perm_feat = ds.read(perm_index + 1) prediction_img[perm_feat == 1] = 0 # Add actual flood values to cloud-covered pixel positions flooded_img = np.zeros(shape) flooded_img[:] = np.nan flooded_img[rows, cols] = data_vector_test[:, data_shape[1] - 1] # Visualizing FNs/FPs ones = np.ones(shape=shape) red_actual = np.where(ones, flooded_img, 0.5) # Actual blue_preds = np.where(ones, prediction_img, 0.5) # Predictions green_combo = np.minimum(red_actual, blue_preds) alphas = np.ones(shape) * 255 # Convert black pixels to transparent in fpfn image so it can overlay RGB fpfn_img = np.dstack((red_actual, green_combo, blue_preds, alphas)) fpfn_overlay_file = plot_path / '{}'.format('false_map' + str(pctl) + '.png') indices = np.where((fpfn_img[:, :, 0] == 0) & (fpfn_img[:, :, 1] == 0) & (fpfn_img[:, :, 2] == 0) & (fpfn_img[:, :, 3] == 255)) fpfn_img[indices] = 0 fpfn_overlay = Image.fromarray(fpfn_img, mode='RGBA') fpfn_overlay.save(fpfn_overlay_file, dpi=(300, 300)) # Superimpose comparison image and RGB image, then save and close if save: rgb_img.paste(fpfn_overlay, (0, 0), fpfn_overlay) print('Saving overlay image for', str(pctl) + '{}'.format('%')) rgb_img.save( plot_path / '{}'.format('false_map_overlay' + str(pctl) + '.png'), dpi=(300, 300)) plt.close('all')