def test_regression(self): stack = Raster(ms.predictors) training_pt = gpd.read_file(ms.meuse) training = stack.extract_vector( response=training_pt, columns=['cadmium', 'copper', 'lead', 'zinc']) # single target regression regr = RandomForestRegressor(n_estimators=50) X = training.loc[:, stack.names] y = training['zinc'] regr.fit(X, y) single_regr = stack.predict(regr) self.assertIsInstance(single_regr, Raster) self.assertEqual(single_regr.count, 1) # multi-target regression y = training.loc[:, ['zinc', 'cadmium', 'copper', 'lead']] regr.fit(X, y) multi_regr = stack.predict(regr) self.assertIsInstance(multi_regr, Raster) self.assertEqual(multi_regr.count, 4)
def test_extract_points(self): stack = Raster(self.predictors) # extract training data from points training_pt = geopandas.read_file( os.path.join(nc_dir, 'landsat96_points.shp')) X, y, xy = stack.extract_vector(response=training_pt, field='id', return_array=True) # remove masked values mask2d = X.mask.any(axis=1) X = X[~mask2d] y = y[~mask2d] xy = xy[~mask2d] # check shapes of extracted pixels self.assertTupleEqual(X.shape, (562, 6)) self.assertTupleEqual(y.shape, (562, )) self.assertTupleEqual(xy.shape, (562, 2)) # check summarized values of extracted y values self.assertTrue( np.equal(np.bincount(y), np.asarray([0, 161, 3, 76, 36, 275, 8, 3])).all()) # check extracted X values self.assertAlmostEqual(X[:, 0].mean(), 81.588968, places=2) self.assertAlmostEqual(X[:, 1].mean(), 67.619217, places=2) self.assertAlmostEqual(X[:, 2].mean(), 67.455516, places=2) self.assertAlmostEqual(X[:, 3].mean(), 69.153025, places=2) self.assertAlmostEqual(X[:, 4].mean(), 90.051601, places=2) self.assertAlmostEqual(X[:, 5].mean(), 59.558719, places=2)
class TestToCrs(TestCase): def setUp(self) -> None: # inputs self.predictors = [ nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7 ] self.stack = Raster(self.predictors) training_py = gpd.read_file(nc.polygons) self.crop_bounds = training_py.loc[0, "geometry"].bounds # outputs self.cropped = None def tearDown(self) -> None: self.stack.close() self.cropped.close() def test_crop_defaults(self): self.cropped = self.stack.crop(self.crop_bounds) # check raster object self.assertIsInstance(self.cropped, Raster) self.assertEqual(self.cropped.count, self.stack.count) self.assertEqual(self.cropped.read(masked=True).count(), 1440) # test nodata value is recognized self.assertEqual(self.cropped.read(masked=True).min(), 35.0) self.assertEqual(self.cropped.read(masked=True).max(), 168.0) def test_crop_in_memory(self): self.cropped = self.stack.crop(self.crop_bounds, in_memory=True) self.assertIsInstance(self.cropped, Raster)
def test_rename_inplace(self): stack = Raster(self.predictors) band3_stats = stack.lsat7_2000_30.mean() # rename band 3 stack.rename(names={"lsat7_2000_30": "new_name"}, in_place=True) # check that renaming occurred in Raster self.assertEqual(list(stack.names)[2], "new_name") self.assertNotIn("lsat7_2000_30", stack.names) # check that Raster layer properties also renamed self.assertIn("new_name", dir(stack)) self.assertNotIn("lsat7_2000_30", dir(stack)) # check that internal name of RasterLayer was also renamed self.assertEqual(stack.iloc[2].name, "new_name") # check that the RasterLayer attached to the new name is the same self.assertEqual(stack["new_name"].mean(), band3_stats) self.assertEqual(stack.new_name.mean(), band3_stats) self.assertEqual(stack.iloc[2].mean(), band3_stats) # check that a new Raster object derived from the renamed data # have the right names new_raster = Raster(src=stack.iloc[2]) self.assertIn("new_name", new_raster.names)
def test_initiation_band(self): # test init from single rasterio.band object with rasterio.open(nc.band1) as src: band = rasterio.band(src, 1) self.stack = Raster(band) self.assertIsInstance(self.stack, Raster) self.assertEqual(self.stack.count, 1)
def test_plotting_single(self): stack = Raster(self.predictors[0]) p = stack.plot(legend_kwds={ "orientation": "horizontal", "fraction": 0.04 }) self.assertIsInstance(p, mpl.axes.Subplot)
def test_subset_multiple_layers(self): stack = Raster(self.predictors + [nc.multiband]) # Subset multiple layers using a slice of index positions # - returns a Raster object self.assertIsInstance(stack.iloc[0:2], Raster) # Subset multiple layers using a list of index positions # - returns a Raster object self.assertIsInstance(stack.iloc[[0, 1, 2]], Raster) # Subset multiple layers using a list of labels # - returns a Raster object subset_raster = stack[["lsat7_2000_10", "lsat7_2000_70"]] self.assertIsInstance(subset_raster, Raster) self.assertListEqual(list(subset_raster.names), ["lsat7_2000_10", "lsat7_2000_70"]) # Check that label and integer subset return the same layers self.assertListEqual( list(stack.iloc[0:3].names), list(stack[["lsat7_2000_10", "lsat7_2000_20", "lsat7_2000_30"]].names), ) stack.close()
def test_classification(self): stack = Raster(self.predictors) training_pt = gpd.read_file(nc.points) df_points = stack.extract_vector(response=training_pt, columns='id') clf = RandomForestClassifier(n_estimators=50) X = df_points.drop(columns=['id', 'geometry']) y = df_points.id clf.fit(X, y) # classification cla = stack.predict(estimator=clf, dtype='int16', nodata=0) self.assertIsInstance(cla, Raster) self.assertEqual(cla.count, 1) self.assertEqual(cla.read(masked=True).count(), 135092) # class probabilities probs = stack.predict_proba(estimator=clf) self.assertIsInstance(cla, Raster) self.assertEqual(probs.count, 7) for _, layer in probs: self.assertEqual(layer.read(masked=True).count(), 135092)
def test_rename_with_copy(self): stack = Raster(self.predictors) names = deepcopy(stack.names) band3_stats = stack.lsat7_2000_30.mean() # rename band 3 result = stack.rename(names={"lsat7_2000_30": "new_name"}, in_place=False) # check that original is untouched self.assertEqual(stack.names, names) # check that renaming occurred in Raster self.assertEqual(result.names[2], "new_name") self.assertNotIn("lsat7_2000_30", result.names) # check that Raster layer properties also renamed self.assertIn("new_name", dir(result)) self.assertNotIn("lsat7_2000_30", dir(result)) # check that internal name of RasterLayer was also renamed self.assertEqual(result.iloc[2].names[0], "new_name") # check that the RasterLayer attached to the new name is the same self.assertEqual(result["new_name"].mean(), band3_stats) self.assertEqual(result["new_name"].mean(), band3_stats) self.assertEqual(result.new_name.mean(), band3_stats) self.assertEqual(result.iloc[2].mean(), band3_stats) # check that a new Raster object derived from the renamed data # have the right names new_raster = Raster(src=result.iloc[2]) self.assertIn("new_name", new_raster.names)
class TestPrediction(TestCase): nc_predictors = [ nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7 ] stack_nc = Raster(nc_predictors) stack_meuse = Raster(ms.predictors) def test_classification(self): training_pt = gpd.read_file(nc.points) df_points = self.stack_nc.extract_vector(gdf=training_pt) df_points["class_id"] = training_pt["id"] df_points = df_points.dropna() clf = RandomForestClassifier(n_estimators=50) X = df_points.drop(columns=["id", "class_id", "geometry"]) y = df_points.class_id clf.fit(X, y) # classification cla = self.stack_nc.predict(estimator=clf, dtype="int16", nodata=0) self.assertIsInstance(cla, Raster) self.assertEqual(cla.count, 1) self.assertEqual(cla.read(masked=True).count(), 135092) # class probabilities probs = self.stack_nc.predict_proba(estimator=clf) self.assertIsInstance(cla, Raster) self.assertEqual(probs.count, 7) for _, layer in probs: self.assertEqual(layer.read(masked=True).count(), 135092) def test_regression(self): training_pt = gpd.read_file(ms.meuse) training = self.stack_meuse.extract_vector(gdf=training_pt) training["zinc"] = training_pt["zinc"] training["cadmium"] = training_pt["cadmium"] training["copper"] = training_pt["copper"] training["lead"] = training_pt["lead"] training = training.dropna() # single target regression regr = RandomForestRegressor(n_estimators=50) X = training.loc[:, self.stack_meuse.names] y = training["zinc"] regr.fit(X, y) single_regr = self.stack_meuse.predict(regr) self.assertIsInstance(single_regr, Raster) self.assertEqual(single_regr.count, 1) # multi-target regression y = training.loc[:, ["zinc", "cadmium", "copper", "lead"]] regr.fit(X, y) multi_regr = self.stack_meuse.predict(regr) self.assertIsInstance(multi_regr, Raster) self.assertEqual(multi_regr.count, 4)
def test_initiation_list_datasetreader(self): # test init from list of rasterio.io.datasetreader objects srcs = [] for f in self.predictors: srcs.append(rasterio.open(f)) self.stack = Raster(srcs) self.assertIsInstance(self.stack, Raster) self.assertEqual(self.stack.count, 6)
def setUp(self) -> None: self.predictors = [ nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7 ] self.extracted_grass = pd.read_table(nc.extracted_pixels, delimiter=" ") self.stack = Raster(self.predictors)
def setUp(self) -> None: predictors = [ nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7 ] self.stack = Raster(predictors) points = gpd.read_file(nc.points) data = self.stack.extract_vector(points) self.data = data.dropna()
def setUp(self) -> None: # inputs self.predictors = [nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7] self.stack = Raster(self.predictors) # test results self.result = None
def test_initiation_list_bands(self): # test init from list of rasterio.band objects bands = [] for f in self.predictors: src = rasterio.open(f) bands.append(rasterio.band(src, 1)) self.stack = Raster(bands) self.assertIsInstance(self.stack, Raster) self.assertEqual(self.stack.count, 6)
def test_write(self): # test writing to file self.stack = Raster(self.predictors) fp = NamedTemporaryFile(suffix=".tif").name result = self.stack.write(fp) self.assertIsInstance(result, Raster) self.assertEqual(result.count, self.stack.count)
def test_names_subsetting(self) -> None: """Test that the names of the bands are preserved when subsetting a raster """ r = Raster(self.fp) subset = r.iloc[[0, 1]] self.assertEqual(list(subset.names), self.descriptions[0:2]) new = r.copy(["band_1", "band_2"]) new["band_3"] = r["band_3"] self.assertEqual(list(new.names), self.descriptions[0:3])
def test_initiation_list_rasterlayer(self): # test init from a list of RasterLayer objects layers = [] for f in self.predictors: src = rasterio.open(f) band = rasterio.band(src, 1) layers.append(RasterLayer(band)) self.stack = Raster(layers) self.assertIsInstance(self.stack, Raster) self.assertEqual(self.stack.count, 6)
class TestSample(TestCase): def setUp(self) -> None: predictors = [nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7] self.stack = Raster(predictors) self.strata = Raster(nc.strata) def tearDown(self) -> None: self.stack.close() self.strata.close() def test_sample_strata(self): # extract using a strata raster and returning two arrays size = 100 categories = self.strata.read(masked=True).flatten() categories = categories[~categories.mask] n_categories = np.unique(categories).shape[0] n_samples = size * n_categories X, xy = self.stack.sample(size=size, strata=self.strata, return_array=True) self.assertEqual(X.shape, (n_samples, 6)) self.assertEqual(xy.shape, (n_samples, 2)) # extract using a strata raster and returning a dataframe samples = self.stack.sample(size=size, strata=self.strata, return_array=False) self.assertEqual(samples.shape, (n_samples, 7)) def test_sample_no_strata(self): size = 100 X, xy = self.stack.sample(size=size, return_array=True) self.assertEqual(X.shape, (size, 6)) self.assertEqual(xy.shape, (size, 2)) samples = self.stack.sample(size=size, return_array=False) self.assertEqual(samples.shape, (size, 7))
def setUp(self) -> None: # inputs self.predictors = [ nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7 ] self.stack = Raster(self.predictors) training_py = gpd.read_file(nc.polygons) self.crop_bounds = training_py.loc[0, "geometry"].bounds # outputs self.cropped = None
def test_drop_inplace(self): stack = Raster(self.predictors) stack.drop(labels='lsat7_2000_50', in_place=True) # check that Raster object is returned self.assertIsInstance(stack, Raster) # check that RasterLayer has been dropped self.assertEqual(stack.count, 5) self.assertNotIn('lsat7_2000_50', stack.names)
def test_indexing(self): stack = Raster(self.predictors + [nc.multiband]) # replace band 1 with band 7 band7_mean = stack.loc['lsat7_2000_70'].read(masked=True).mean() stack.iloc[0] = Raster(nc.band7).iloc[0] self.assertEqual(stack.iloc[0].read(masked=True).mean(), band7_mean) self.assertEqual(stack.loc['lsat7_2000_10'].read(masked=True).mean(), band7_mean) self.assertEqual(stack['lsat7_2000_10'].read(masked=True).mean(), band7_mean) self.assertEqual(stack.lsat7_2000_10.read(masked=True).mean(), band7_mean)
def setUp(self) -> None: # test inputs training_py = gpd.read_file(nc.polygons) self.mask_py = training_py.iloc[0:1, :] predictors = [ nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7 ] self.stack = Raster(predictors) # test results self.masked_object = None
def rename_in_memory(self): # Create a fake 3-band image for testing arr = np.random.rand(3, 64, 64) file = NamedTemporaryFile(prefix="test", suffix=".tif").name with rasterio.open(file, "w", width=64, height=64, count=3, dtype=np.float32) as dst: dst.write(arr) r = Raster(file) in_memory = r.aggregate((32, 32), in_memory=True) renamed = r.rename(dict(zip(in_memory.names, ["Red", "Green", "Blue"]))) self.assertListEqual(list(renamed.names), ["Red", "Green", "Blue"])
def test_regression(self): meuse_predictors = os.listdir(meuse_dir) meuse_predictors = [ os.path.join(meuse_dir, i) for i in meuse_predictors if i.endswith('.tif') ] stack = Raster(meuse_predictors) self.assertEqual(stack.count, 21) training_pt = gpd.read_file(os.path.join(meuse_dir, 'meuse.shp')) training = stack.extract_vector(response=training_pt, field='cadmium') training['copper'] = stack.extract_vector(response=training_pt, field='copper')['copper'] training['lead'] = stack.extract_vector(response=training_pt, field='lead')['lead'] training['zinc'] = stack.extract_vector(response=training_pt, field='zinc')['zinc'] # single target regression regr = RandomForestRegressor(n_estimators=50) X = training.loc[:, stack.names] y = training['zinc'] regr.fit(X, y) single_regr = stack.predict(regr) self.assertIsInstance(single_regr, Raster) self.assertEqual(single_regr.count, 1) # multi-target regression y = training.loc[:, ['zinc', 'cadmium', 'copper', 'lead']] regr.fit(X, y) multi_regr = stack.predict(regr) self.assertIsInstance(multi_regr, Raster) self.assertEqual(multi_regr.count, 4)
class TestStats(unittest.TestCase): def setUp(self) -> None: predictors = [ nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7 ] self.predictors = predictors self.stack = Raster(predictors) def test_rasterstats(self): self.assertEqual(len(self.stack.min()), len(self.predictors)) self.assertTrue(~np.isnan(self.stack.min()).all()) self.assertEqual(len(self.stack.max()), len(self.predictors)) self.assertTrue(~np.isnan(self.stack.max()).all()) self.assertEqual(len(self.stack.mean()), len(self.predictors)) self.assertTrue(~np.isnan(self.stack.mean()).all()) self.assertEqual(len(self.stack.median()), len(self.predictors)) self.assertTrue(~np.isnan(self.stack.median()).all()) def test_layerstats(self): self.assertEqual(self.stack.iloc[0].min(), 56.0) self.assertEqual(self.stack.iloc[0].max(), 255.0) self.assertAlmostEqual(self.stack.iloc[0].mean(), 80.6, places=0) self.assertEqual(self.stack.iloc[0].median(), 75.0)
def rename_multiband(self): # Create a fake 3-band image for testing arr = np.random.rand(3, 64, 64) file = NamedTemporaryFile(prefix="test", suffix=".tif").name layer_name = os.path.basename(file).split(".")[0] layer_names = ["_".join([layer_name, str(i)]) for i in [1, 2 ,3]] with rasterio.open(file, "w", width=64, height=64, count=3, dtype=np.float32) as dst: dst.write(arr) r = Raster(file) self.assertListEqual(list(r.names), layer_names) renamed = r.rename(dict(zip(r.names, ["Red", "Green", "Blue"]))) self.assertListEqual(list(renamed.names), ["Red", "Green", "Blue"])
class TestToCrs(TestCase): def setUp(self) -> None: # test inputs predictors = [ nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7 ] self.stack = Raster(predictors) # test results self.stack_prj = None def tearDown(self) -> None: self.stack.close() self.stack_prj.close() def test_to_crs_defaults(self): self.stack_prj = self.stack.to_crs({"init": "EPSG:4326"}) # check raster object self.assertIsInstance(self.stack_prj, Raster) self.assertEqual(self.stack_prj.count, self.stack.count) self.assertEqual(self.stack_prj.read(masked=True).count(), 1012061) # test nodata value is recognized self.assertEqual( self.stack_prj.read(masked=True).min(), self.stack.read(masked=True).min()) self.assertEqual( self.stack_prj.read(masked=True).max(), self.stack.read(masked=True).max()) def test_to_crs_custom_nodata(self): self.stack_prj = self.stack.to_crs({"init": "EPSG:4326"}, nodata=-999) # check raster object self.assertIsInstance(self.stack_prj, Raster) self.assertEqual(self.stack_prj.count, self.stack.count) self.assertEqual(self.stack_prj.read(masked=True).count(), 1012061) # test nodata value is recognized self.assertEqual( self.stack_prj.read(masked=True).min(), self.stack.read(masked=True).min()) self.assertEqual( self.stack_prj.read(masked=True).max(), self.stack.read(masked=True).max()) def test_to_crs_in_memory(self): self.stack_prj = self.stack.to_crs({"init": "EPSG:4326"}, in_memory=True) # check raster object self.assertIsInstance(self.stack_prj, Raster)
def test_drop_with_copy(self): stack = Raster(self.predictors) names = stack.names result = stack.drop(labels='lsat7_2000_50', in_place=False) # check that Raster object is returned self.assertIsInstance(result, Raster) # check that RasterLayer has been dropped self.assertEqual(result.count, 5) self.assertNotIn('lsat7_2000_50', result.names) # check that original raster is unaffected self.assertEqual(stack.count, 6) self.assertEqual(stack.names, names)
def test_names_from_rasterio(self) -> None: """Test the initiation of a Raster object from a rasterio.DatasetReader object when the file raster dataset has band descriptions """ with rasterio.open(self.fp) as src: r = Raster(src) self.assertEqual(list(r.names), self.descriptions)