Load data Filter areas where we have LiDAR estimates Subsample if desired/required #------------------------------------------------------------------------------- """ print('Loading data') # load a template raster #template_file = '%s/lidar/processed/%s_AGB_07-31-19_regridded.tif' % (path2data,site_id) lidar_agb_file = '/exports/csce/datastore/geos/users/dmilodow/FOREST2020/LiDARupscaling/data/lidar_calibration/kiuic_lidar_agb_median.tif' lidar = io.load_geotiff(lidar_agb_file, option=1) target = lidar.values.copy() target[target < 0] = np.nan # Load predictors & target data_layers_all, data_mask, labels_all = io.load_predictors( layers=['sentinel2', 'alos']) data_layers_s2, temp, labels_s2 = io.load_predictors(layers=['sentinel2']) data_layers_alos, data_mask, labels_alos = io.load_predictors(layers=['alos']) # load forest mask forest_mask_file = "/exports/csce/datastore/geos/groups/gcel/YucatanBiomass/data/forest_mask/%s_forest_mask_20m.tif" % site_id forest = xr.open_rasterio(forest_mask_file).values[0] forest_mask = forest == 1 forest_mask = forest_mask * data_mask # Keep only areas for which we have biomass estimates training_mask = np.isfinite(target) training_mask = image.binary_erosion(training_mask, iterations=1) training_mask = training_mask * forest_mask # Apply masks to the predictor dataset to be ingested into sklearn routines
Load data Filter areas where we have LiDAR estimates Subsample if desired/required #------------------------------------------------------------------------------- """ print('Loading data') # load a template raster lidar_agb_file = '../data/lidar_calibration/%sm/kiuic_lidar_agb_%s_median.tif' % ( resolution.zfill(3), version_trials) lidar = io.load_geotiff(lidar_agb_file, option=1) target = lidar.values.copy() target[target < 0] = np.nan # Load predictors & target data_layers, data_mask, labels = io.load_predictors( layers=['sentinel2', 'alos'], resolution=resolution) #layers_to_remove = ['ASM','homogeneity','correlation'] #layers_to_remove = ['ASM','homogeneity','correlation','contrast','dissimilarity'] layers_to_remove = [] n_predictors = data_layers.shape[0] layer_mask = np.ones(n_predictors, dtype='bool') labels_update = [] for ii, lab in enumerate(labels): for layer in layers_to_remove: if layer in lab: print('remove', lab) layer_mask[ii] = False if layer_mask[ii]: labels_update.append(lab) data_layers = data_layers[layer_mask] labels = labels_update
""" Project Info """ site_id = 'kiuic' version = '001' path2alg = '../saved_models/' """ #=============================================================================== PART A: LOAD IN DATA AND SUBSET THE TRAINING DATA Load data Filter areas where we have LiDAR estimates Subsample if desired/required #------------------------------------------------------------------------------- """ # Load predictors & target predictors, target, landmask, labels = io.load_predictors() # Keep only areas for which we have biomass estimates mask = np.isfinite(target) X = predictors[mask, :] y = target[mask] """ #=============================================================================== PART B: CAL-VAL Cal-val Cal-val figures Importances via permutation importance #------------------------------------------------------------------------------- """ X_train, X_test, y_train, y_test = train_test_split(X, y,