def test_regression(self): meuse_predictors = os.listdir(meuse_dir) meuse_predictors = [ os.path.join(meuse_dir, i) for i in meuse_predictors if i.endswith('.tif') ] stack = Raster(meuse_predictors) self.assertEqual(stack.count, 21) training_pt = gpd.read_file(os.path.join(meuse_dir, 'meuse.shp')) training = stack.extract_vector(response=training_pt, field='cadmium') training['copper'] = stack.extract_vector(response=training_pt, field='copper')['copper'] training['lead'] = stack.extract_vector(response=training_pt, field='lead')['lead'] training['zinc'] = stack.extract_vector(response=training_pt, field='zinc')['zinc'] # single target regression regr = RandomForestRegressor(n_estimators=50) X = training.loc[:, stack.names] y = training['zinc'] regr.fit(X, y) single_regr = stack.predict(regr) self.assertIsInstance(single_regr, Raster) self.assertEqual(single_regr.count, 1) # multi-target regression y = training.loc[:, ['zinc', 'cadmium', 'copper', 'lead']] regr.fit(X, y) multi_regr = stack.predict(regr) self.assertIsInstance(multi_regr, Raster) self.assertEqual(multi_regr.count, 4)
def test_extract_points(self): stack = Raster(self.predictors) # extract training data from points training_pt = geopandas.read_file( os.path.join(nc_dir, 'landsat96_points.shp')) X, y, xy = stack.extract_vector(response=training_pt, field='id', return_array=True) # remove masked values mask2d = X.mask.any(axis=1) X = X[~mask2d] y = y[~mask2d] xy = xy[~mask2d] # check shapes of extracted pixels self.assertTupleEqual(X.shape, (562, 6)) self.assertTupleEqual(y.shape, (562, )) self.assertTupleEqual(xy.shape, (562, 2)) # check summarized values of extracted y values self.assertTrue( np.equal(np.bincount(y), np.asarray([0, 161, 3, 76, 36, 275, 8, 3])).all()) # check extracted X values self.assertAlmostEqual(X[:, 0].mean(), 81.588968, places=2) self.assertAlmostEqual(X[:, 1].mean(), 67.619217, places=2) self.assertAlmostEqual(X[:, 2].mean(), 67.455516, places=2) self.assertAlmostEqual(X[:, 3].mean(), 69.153025, places=2) self.assertAlmostEqual(X[:, 4].mean(), 90.051601, places=2) self.assertAlmostEqual(X[:, 5].mean(), 59.558719, places=2)
def test_regression(self): stack = Raster(ms.predictors) training_pt = gpd.read_file(ms.meuse) training = stack.extract_vector( response=training_pt, columns=['cadmium', 'copper', 'lead', 'zinc']) # single target regression regr = RandomForestRegressor(n_estimators=50) X = training.loc[:, stack.names] y = training['zinc'] regr.fit(X, y) single_regr = stack.predict(regr) self.assertIsInstance(single_regr, Raster) self.assertEqual(single_regr.count, 1) # multi-target regression y = training.loc[:, ['zinc', 'cadmium', 'copper', 'lead']] regr.fit(X, y) multi_regr = stack.predict(regr) self.assertIsInstance(multi_regr, Raster) self.assertEqual(multi_regr.count, 4)
def test_classification(self): stack = Raster(self.predictors) training_pt = gpd.read_file(nc.points) df_points = stack.extract_vector(response=training_pt, columns='id') clf = RandomForestClassifier(n_estimators=50) X = df_points.drop(columns=['id', 'geometry']) y = df_points.id clf.fit(X, y) # classification cla = stack.predict(estimator=clf, dtype='int16', nodata=0) self.assertIsInstance(cla, Raster) self.assertEqual(cla.count, 1) self.assertEqual(cla.read(masked=True).count(), 135092) # class probabilities probs = stack.predict_proba(estimator=clf) self.assertIsInstance(cla, Raster) self.assertEqual(probs.count, 7) for _, layer in probs: self.assertEqual(layer.read(masked=True).count(), 135092)
def test_extract_polygons(self): stack = Raster(self.predictors) # extract training data from polygons training_py = geopandas.read_file(nc.polygons) X, y, xy = stack.extract_vector( response=training_py, columns='id', return_array=True) # remove masked values mask2d = X.mask.any(axis=1) X = X[~mask2d] y = y[~mask2d] xy = xy[~mask2d] # check shapes of extracted pixels self.assertTupleEqual(X.shape, (2436, 6)) self.assertTupleEqual(y.shape, (2436, )) self.assertTupleEqual(xy.shape, (2436, 2))
class TestAlter(TestCase): def setUp(self) -> None: predictors = [ nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7 ] self.stack = Raster(predictors) points = gpd.read_file(nc.points) data = self.stack.extract_vector(points) self.data = data.dropna() def tearDown(self) -> None: self.stack.close() def test_alter(self): scaler = StandardScaler() scaler.fit(self.data.drop(columns=["geometry"]).values) out = self.stack.alter(scaler) self.assertIsInstance(out, Raster) self.assertEqual(out.shape, self.stack.shape)
def test_extract_lines(self): stack = Raster(self.predictors) # extract training data from lines training_py = geopandas.read_file(nc.polygons) training_lines = deepcopy(training_py) training_lines['geometry'] = training_lines.geometry.boundary X, y, xy = stack.extract_vector( response=training_lines, columns='id', return_array=True) # remove masked values mask2d = X.mask.any(axis=1) X = X[~mask2d] y = y[~mask2d] xy = xy[~mask2d] # check shapes of extracted pixels self.assertTupleEqual(X.shape, (948, 6)) self.assertTupleEqual(y.shape, (948, )) self.assertTupleEqual(xy.shape, (948, 2))
training_pt = geopandas.read_file(nc.points) training_px = rasterio.open(os.path.join(nc.labelled_pixels)) training_lines = deepcopy(training_py) training_lines['geometry'] = training_lines.geometry.boundary # Plot some training data plt.imshow(stack.lsat7_2000_70.read(masked=True), extent=rasterio.plot.plotting_extent(stack.lsat7_2000_70.ds)) plt.scatter(x=training_pt.bounds.iloc[:, 0], y=training_pt.bounds.iloc[:, 1], s=2, color='black') plt.show() # Create a training dataset by extracting the raster values at the training point locations: stack = Raster(predictors) df_points = stack.extract_vector(response=training_pt, field='id') df_polygons = stack.extract_vector(response=training_py, field='id') df_lines = stack.extract_vector(response=training_lines, field='id') df_raster = stack.extract_raster(response=training_px, value_name='id') df_points.head() # Next we can train a logistic regression classifier: from sklearn.linear_model import LogisticRegressionCV from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline from sklearn.model_selection import cross_validate # define the classifier with standardization of the input features in a pipeline lr = Pipeline( [('scaling', StandardScaler()), ('classifier', LogisticRegressionCV(n_jobs=-1))])
class TestExtract(TestCase): def setUp(self) -> None: self.predictors = [ nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7 ] self.extracted_grass = pd.read_table(nc.extracted_pixels, delimiter=" ") self.stack = Raster(self.predictors) def tearDown(self) -> None: self.stack.close() def test_extract_points(self): training_pt = geopandas.read_file(nc.points) # check that extracted training data as a DataFrame match known values df = self.stack.extract_vector(gdf=training_pt) df = df.dropna() training_pt = training_pt.dropna() self.assertTrue( (df["lsat7_2000_10"].values == training_pt["b1"].values).all()) self.assertTrue( (df["lsat7_2000_20"].values == training_pt["b2"].values).all()) self.assertTrue( (df["lsat7_2000_30"].values == training_pt["b3"].values).all()) self.assertTrue( (df["lsat7_2000_40"].values == training_pt["b4"].values).all()) self.assertTrue( (df["lsat7_2000_50"].values == training_pt["b5"].values).all()) self.assertTrue( (df["lsat7_2000_70"].values == training_pt["b7"].values).all()) def test_extract_polygons(self): # extract training data from polygons training_py = geopandas.read_file(nc.polygons) df = self.stack.extract_vector(gdf=training_py) df = df.dropna() df = df.merge( right=training_py.loc[:, ("id", "label")], left_on="geometry_idx", right_on="index", right_index=True, ) # compare to extracted data using GRASS GIS self.assertEqual(df.shape[0], self.extracted_grass.shape[0]) self.assertAlmostEqual(df["lsat7_2000_10"].mean(), self.extracted_grass["b1"].mean(), places=2) self.assertAlmostEqual(df["lsat7_2000_20"].mean(), self.extracted_grass["b2"].mean(), places=2) self.assertAlmostEqual(df["lsat7_2000_30"].mean(), self.extracted_grass["b3"].mean(), places=2) self.assertAlmostEqual(df["lsat7_2000_40"].mean(), self.extracted_grass["b4"].mean(), places=2) self.assertAlmostEqual(df["lsat7_2000_50"].mean(), self.extracted_grass["b5"].mean(), places=2) self.assertAlmostEqual(df["lsat7_2000_70"].mean(), self.extracted_grass["b7"].mean(), places=2) def test_extract_lines(self): # extract training data from lines training_py = geopandas.read_file(nc.polygons) training_lines = deepcopy(training_py) training_lines["geometry"] = training_lines.geometry.boundary df = self.stack.extract_vector(gdf=training_lines).dropna() # check shapes of extracted pixels self.assertEqual(df.shape[0], 948) def test_extract_raster(self): # extract training data from labelled pixels with rasterio.open(nc.labelled_pixels) as src: df = self.stack.extract_raster(src) df = df.dropna() self.assertEqual(df.shape[0], self.extracted_grass.shape[0]) self.assertAlmostEqual(df["lsat7_2000_10"].mean(), self.extracted_grass["b1"].mean(), places=3) self.assertAlmostEqual(df["lsat7_2000_20"].mean(), self.extracted_grass["b2"].mean(), places=3) self.assertAlmostEqual(df["lsat7_2000_30"].mean(), self.extracted_grass["b3"].mean(), places=3) self.assertAlmostEqual(df["lsat7_2000_40"].mean(), self.extracted_grass["b4"].mean(), places=3) self.assertAlmostEqual(df["lsat7_2000_50"].mean(), self.extracted_grass["b5"].mean(), places=3) self.assertAlmostEqual(df["lsat7_2000_70"].mean(), self.extracted_grass["b7"].mean(), places=3)
training_pt = geopandas.read_file(nc.points) training_px = rasterio.open(os.path.join(nc.labelled_pixels)) training_lines = deepcopy(training_py) training_lines['geometry'] = training_lines.geometry.boundary # Plot some training data plt.imshow(stack.lsat7_2000_70.read(masked=True), extent=rasterio.plot.plotting_extent(stack.lsat7_2000_70.ds)) plt.scatter(x=training_pt.bounds.iloc[:, 0], y=training_pt.bounds.iloc[:, 1], s=2, color='black') plt.show() # Create a training dataset by extracting the raster values at the training point locations: stack = Raster(predictors) df_points = stack.extract_vector(response=training_pt, columns='id') df_polygons = stack.extract_vector(response=training_py, columns='id') df_lines = stack.extract_vector(response=training_lines, columns='id') df_raster = stack.extract_raster(response=training_px, value_name='id') df_points.head() # Next we can train a logistic regression classifier: from sklearn.linear_model import LogisticRegressionCV from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline from sklearn.model_selection import cross_validate # define the classifier with standardization of the input features in a pipeline lr = Pipeline( [('scaling', StandardScaler()), ('classifier', LogisticRegressionCV(n_jobs=-1))])
ep.plot_bands(result_neigh.read(), extent=extent, ax=axes[1,1], cmap='RdYlGn', alpha=0.8, title="Clasificación KNeighbors", cbar=False) manzana.boundary.plot(ax=axes[1,1], color='white', alpha=0.5, linewidth=1) # Plot ndvi ep.plot_bands(raster_ndvi.read(), extent=extent, ax=axes[2,0], cmap='RdYlGn', alpha=0.8, title="NDVI", cbar=False) manzana.boundary.plot(ax=axes[2,0], color='white', alpha=0.5, linewidth=1) # Plot EVI ep.plot_bands(raster_evi.read(), extent=extent, ax=axes[2,1], cmap='RdYlGn', alpha=0.8, title="EVI", cbar=False) manzana.boundary.plot(ax=axes[2,1], color='white', alpha=0.5, linewidth=1) plt.tight_layout() plt.show() # %% df_ndvi = raster_ndvi.extract_vector(training) df_ndvi.rename(columns={df_ndvi.columns[0]: 'id'}, inplace=True) df_evi = raster_evi.extract_vector(training) df_evi.rename(columns={df_evi.columns[0]: 'id'}, inplace=True) # %% # Evaluación Test de NDVI train_accuracy_score2, train_precision_score2, train_recall_score2 = calculate_binary_class_scores(y, df_ndvi['id'].values) print('NDVI Test Data Accuracy (%) = ', round(train_accuracy_score2*100,2)) print('NDVI Test Data Precision (%) = ', round(train_precision_score2*100,2)) print('NDVI Test Data Recall (%) = ', round(train_recall_score2*100,2)) cm = confusion_matrix(y, df_ndvi['id'].values)