class Singleband(GeoTiffIOTest, BaseTestClass): dir_path = file_path("one-month-tiles/") @pytest.fixture(autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def read_singleband_geotrellis(self, options=None): if options is None: result = get(LayerType.SPATIAL, self.dir_path, max_tile_size=512) else: result = get(LayerType.SPATIAL, self.dir_path, max_tile_size=256) return result.to_numpy_rdd().values().collect() def test_whole_tiles(self): geotrellis_tiles = self.read_singleband_geotrellis() file_paths = self.get_filepaths(self.dir_path) rasterio_tiles = self.read_geotiff_rasterio(file_paths, False) for x, y in zip(geotrellis_tiles, rasterio_tiles): self.assertTrue((x.cells == y['cells']).all()) self.assertEqual(x.no_data_value, y['no_data_value']) def windowed_result_checker(self, windowed_tiles): self.assertEqual(len(windowed_tiles), 4) def test_windowed_tiles(self): geotrellis_tiles = self.read_singleband_geotrellis(True) sorted_1 = sorted(geotrellis_tiles, key=lambda x: (x.cells[0, 0, 0], x.cells[0, 255, 255])) file_paths = self.get_filepaths(self.dir_path) rasterio_tiles = self.read_geotiff_rasterio(file_paths, True) sorted_2 = sorted(rasterio_tiles, key=lambda x: (x['cells'][0, 0, 0], x['cells'][0, 255, 255])) self.windowed_result_checker(geotrellis_tiles) for x, y in zip(sorted_1, sorted_2): self.assertEqual(x.cells.shape, y['cells'].shape) self.assertTrue((x.cells == y['cells']).all()) self.assertEqual(x.no_data_value, y['no_data_value'])
class COGTest(BaseTestClass): dir_path = file_path("catalog/") uri = "file://{}".format(dir_path) layer_name = "cog-layer" @pytest.fixture(autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def test_read_value(self): tiled = read_value(self.uri, self.layer_name, 11, 1450, 966) self.assertEqual(tiled.cells.shape, (1, 256, 256)) def test_bad_read_value(self): tiled = read_value(self.uri, self.layer_name, 11, 1450, 2000) self.assertEqual(tiled, None) @pytest.mark.skipif('TRAVIS' in os.environ, reason="test_query does not pass on Travis") def test_query(self): intersection = box(74.88280541992188, 9.667967675781256, 75.05858666503909, 10.019530136718743) queried = query(self.uri, self.layer_name, 11, intersection) result = queried.count() self.assertEqual(result, 4) def test_read_metadata(self): layer = query(self.uri, self.layer_name, 5) actual_metadata = layer.layer_metadata expected_metadata = read_layer_metadata(self.uri, self.layer_name, 5) self.assertEqual(actual_metadata.to_dict(), expected_metadata.to_dict())
class PartitionPreservationTest(BaseTestClass): rdd = get(LayerType.SPATIAL, file_path("srtm_52_11.tif"), max_tile_size=6001) @pytest.fixture(autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def test_partition_preservation(self): partition_states = [] strategy = SpatialPartitionStrategy(16) tiled = self.rdd.tile_to_layout() tiled2 = self.rdd.tile_to_layout(partition_strategy=strategy) partition_states.append(tiled2.get_partition_strategy()) added_layer = (tiled + tiled2) * 0.75 partition_states.append(added_layer.get_partition_strategy()) local_max_layer = added_layer.local_max(tiled) partition_states.append(local_max_layer.get_partition_strategy()) focal_layer = local_max_layer.focal(Operation.MAX, Square(1)) partition_states.append(focal_layer.get_partition_strategy()) reprojected_layer = focal_layer.tile_to_layout( layout=LocalLayout(), target_crs=3857, partition_strategy=strategy) partition_states.append(reprojected_layer.get_partition_strategy()) pyramided = reprojected_layer.pyramid() partition_states.append( pyramided.levels[pyramided.max_zoom].get_partition_strategy()) self.assertTrue(all(x == partition_states[0] for x in partition_states))
def test_rasterization(self): features = osm_reader.from_orc(file_path("zerns.orc")) lines = features.get_way_features_rdd() def assign_cellvalues(feature): tags = feature.properties.tags.values() if 'water' in tags: return Feature(feature.geometry, CellValue(4, 4)) elif "en:Zern's Farmer's Market" in tags: return Feature(feature.geometry, CellValue(3, 3)) else: return Feature(feature.geometry, CellValue(1, 1)) mapped_lines = lines.map(lambda feature: assign_cellvalues(feature)) result = rasterize_features(mapped_lines, 4326, 12, cell_type=CellType.INT8) self.assertEqual(result.get_min_max(), (1, 4)) self.assertEqual(result.count(), 1)
class CatalogTest(BaseTestClass): uri = file_path("srtm_52_11.tif") @pytest.fixture(autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() @pytest.mark.skipif('TRAVIS' in os.environ, reason="Cannot resolve depency issues in Travis for the time being") def test_tiles(self): import geopyspark as gps from geopyspark.geotrellis import rasterio tiles = rasterio._read_windows(self.uri, xcols=256, ycols=256, bands=None, crs_to_proj4=lambda n: '+proj=longlat +datum=WGS84 +no_defs ') self.assertEqual(len(list(tiles)), 144) @pytest.mark.skipif('TRAVIS' in os.environ, reason="Cannot resolve depency issues in Travis for the time being") def test_layer(self): import geopyspark as gps from geopyspark.geotrellis import rasterio rdd0 = gps.rasterio.get(self.uri) rdd1 = gps.RasterLayer.from_numpy_rdd(gps.LayerType.SPATIAL, rdd0) self.assertEqual(rdd1.count(), 144)
class CatalogTest(BaseTestClass): rdd = get(LayerType.SPATIAL, file_path("srtm_52_11.tif"), max_tile_size=6001) metadata = rdd.collect_metadata() reprojected = rdd.tile_to_layout(layout=GlobalLayout(zoom=11), target_crs=3857) result = reprojected.pyramid() dir_path = file_path("catalog/") uri = "file://{}".format(dir_path) layer_name = "catalog-test" @pytest.fixture(autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() @pytest.mark.skip def test_read(self): for x in range(11, 0, -1): actual_layer = query(self.uri, self.layer_name, x).tile_to_layout(LocalLayout(), self.metadata.crs) expected_layer = self.result.levels[x].tile_to_layout( LocalLayout(), self.metadata.crs) actual_md = actual_layer.layer_metadata expected_md = expected_layer.layer_metadata self.assertEqual(actual_md.tile_layout, expected_md.tile_layout) self.assertEqual(actual_md.layout_definition, expected_md.layout_definition) self.assertEqual(actual_md.bounds, expected_md.bounds) def test_read_value(self): tiled = read_value(self.uri, self.layer_name, 11, 1450, 966) self.assertEqual(tiled.cells.shape, (1, 256, 256)) def test_bad_read_value(self): tiled = read_value(self.uri, self.layer_name, 11, 1450, 2000) self.assertEqual(tiled, None) @pytest.mark.skipif('TRAVIS' in os.environ, reason="test_query_1 causes issues on Travis") def test_query1(self): intersection = box(8348915.46680623, 543988.943201519, 8348915.4669, 543988.943201520) queried = query(self.uri, self.layer_name, 11, intersection) self.assertEqual(queried.to_numpy_rdd().first()[0], SpatialKey(1450, 996)) def test_query2(self): intersection = Extent(8348915.46680623, 543988.943201519, 8348915.4669, 543988.943201520) queried = query(self.uri, self.layer_name, 11, intersection, query_proj=3857) self.assertEqual(queried.to_numpy_rdd().first()[0], SpatialKey(1450, 996)) def test_query3(self): intersection = box(8348915.46680623, 543988.943201519, 8348915.4669, 543988.943201520).wkb queried = query(self.uri, self.layer_name, 11, intersection) self.assertEqual(queried.to_numpy_rdd().first()[0], SpatialKey(1450, 996)) def test_query4(self): intersection = 42 with pytest.raises(TypeError): queried = query(self.uri, self.layer_name, 11, query_geom=intersection, num_partitions=2) result = queried.to_numpy_rdd().first()[0] def test_query_partitions(self): intersection = box(8348915.46680623, 543988.943201519, 8348915.4669, 543988.943201520) queried = query(self.uri, self.layer_name, 11, intersection, num_partitions=2) self.assertEqual(queried.to_numpy_rdd().first()[0], SpatialKey(1450, 996)) def test_query_crs(self): intersection = box(74.99958369653905, 4.8808219582513095, 74.99958369738141, 4.880821958251324) queried = query(self.uri, self.layer_name, 11, intersection, query_proj=4326) self.assertEqual(queried.to_numpy_rdd().first()[0], SpatialKey(1450, 996)) def test_read_metadata_exception(self): uri = "abcxyz://123" with pytest.raises(ValueError): layer = read_layer_metadata(uri, self.layer_name, 5) def test_read_metadata1(self): layer = query(self.uri, self.layer_name, 5) actual_metadata = layer.layer_metadata expected_metadata = read_layer_metadata(self.uri, self.layer_name, 5) def test_read_metadata2(self): layer = query(self.uri, self.layer_name, 5) actual_metadata = layer.layer_metadata expected_metadata = read_layer_metadata(self.uri, self.layer_name, 5) self.assertEqual(actual_metadata.to_dict(), expected_metadata.to_dict()) def test_layer_ids(self): ids = AttributeStore(self.uri).layers() self.assertTrue(len(ids) == 11) def test_attributestore(self): store = AttributeStore(self.uri) layer_name = "boop-epsg-bop" value = {"first": 113, "second": "44two"} store.layer(layer_name, 34).write("val", value) self.assertEqual(value, store.layer(layer_name, 34).read("val")) self.assertEqual(value, store.layer(layer_name, 34)["val"]) store.layer(layer_name, 34).delete("val") with pytest.raises(KeyError): store.layer(layer_name, 34)["val"]
class GeoTiffRasterRDDTest(BaseTestClass): dir_path = file_path("all-ones.tif") result = get(LayerType.SPATIAL, dir_path, max_tile_size=256) @pytest.fixture(autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def test_repartition(self): md = self.result.collect_metadata() laid_out_rdd = BaseTestClass.rdd.tile_to_layout(md) repartitioned = laid_out_rdd.repartition(2) self.assertEqual(repartitioned.getNumPartitions(), 2) def test_partitionBy(self): tiled = self.result.tile_to_layout() strategy = SpatialPartitionStrategy(2) repartitioned = tiled.partitionBy(strategy) self.assertEqual(repartitioned.get_partition_strategy(), strategy) def test_to_numpy_rdd(self, option=None): pyrdd = self.result.to_numpy_rdd() (key, tile) = pyrdd.first() self.assertEqual(tile.cells.shape, (1, 256, 256)) def test_collect_metadata(self, options=None): md = self.result.collect_metadata() self.assertTrue('+proj=longlat' in md.crs) self.assertTrue('+datum=WGS84' in md.crs) def test_reproject(self, options=None): tiles = self.result.reproject("EPSG:3857") md = tiles.collect_metadata() self.assertTrue('+proj=merc' in md.crs) def test_to_ud_ubyte(self): arr = np.array([[0.4324323432124, 0.0, 0.0], [1.0, 1.0, 1.0]], dtype=float) epsg_code = 3857 extent = Extent(0.0, 0.0, 10.0, 10.0) projected_extent = ProjectedExtent(extent, epsg_code) tile = Tile(arr, 'FLOAT', float('nan')) rdd = BaseTestClass.pysc.parallelize([(projected_extent, tile)]) raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd) converted = raster_rdd.convert_data_type(CellType.UINT8, no_data_value=-1) tile = converted.to_numpy_rdd().first() no_data = tile[1].no_data_value self.assertEqual(no_data, -1) def test_no_data_deserialization(self): arr = np.int16([[[-32768, -32768, -32768, -32768], [-32768, -32768, -32768, -32768], [-32768, -32768, -32768, -32768], [-32768, -32768, -32768, -32768]]]) epsg_code = 3857 extent = Extent(0.0, 0.0, 10.0, 10.0) projected_extent = ProjectedExtent(extent, epsg_code) tile = Tile(arr, 'SHORT', -32768) rdd = BaseTestClass.pysc.parallelize([(projected_extent, tile)]) raster_layer = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd) actual_tile = raster_layer.to_numpy_rdd().first()[1] self.assertEqual(actual_tile.cell_type, tile.cell_type) self.assertEqual(actual_tile.no_data_value, tile.no_data_value) self.assertTrue((actual_tile.cells == tile.cells).all())
class CatalogTest(BaseTestClass): dir_path = file_path("catalog/") uri = "file://{}".format(dir_path) layer_name = "catalog-test" @pytest.fixture(autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def test_read_value(self): tiled = read_value(self.uri, self.layer_name, 11, 1450, 966) self.assertEqual(tiled.cells.shape, (1, 256, 256)) def test_bad_read_value(self): tiled = read_value(self.uri, self.layer_name, 11, 1450, 2000) self.assertEqual(tiled, None) @pytest.mark.skipif('TRAVIS' in os.environ, reason="test_query_1 causes issues on Travis") def test_query1(self): intersection = box(8348915.46680623, 543988.943201519, 8348915.4669, 543988.943201520) queried = query(self.uri, self.layer_name, 11, intersection) self.assertEqual(queried.to_numpy_rdd().first()[0], SpatialKey(1450, 996)) def test_query2(self): intersection = Extent(8348915.46680623, 543988.943201519, 8348915.4669, 543988.943201520) queried = query(self.uri, self.layer_name, 11, intersection, query_proj=3857) self.assertEqual(queried.to_numpy_rdd().first()[0], SpatialKey(1450, 996)) def test_query3(self): intersection = box(8348915.46680623, 543988.943201519, 8348915.4669, 543988.943201520).wkb queried = query(self.uri, self.layer_name, 11, intersection) self.assertEqual(queried.to_numpy_rdd().first()[0], SpatialKey(1450, 996)) def test_query4(self): intersection = 42 with pytest.raises(TypeError): queried = query(self.uri, self.layer_name, 11, query_geom=intersection, num_partitions=2) result = queried.to_numpy_rdd().first()[0] def test_query_partitions(self): intersection = box(8348915.46680623, 543988.943201519, 8348915.4669, 543988.943201520) queried = query(self.uri, self.layer_name, 11, intersection, num_partitions=2) self.assertEqual(queried.to_numpy_rdd().first()[0], SpatialKey(1450, 996)) def test_query_crs(self): intersection = box(74.99958369653905, 4.8808219582513095, 74.99958369738141, 4.880821958251324) queried = query(self.uri, self.layer_name, 11, intersection, query_proj=4326) self.assertEqual(queried.to_numpy_rdd().first()[0], SpatialKey(1450, 996)) def test_read_metadata_exception(self): uri = "abcxyz://123" with pytest.raises(ValueError): layer = read_layer_metadata(uri, self.layer_name, 5) def test_read_metadata1(self): layer = query(self.uri, self.layer_name, 5) actual_metadata = layer.layer_metadata expected_metadata = read_layer_metadata(self.uri, self.layer_name, 5) def test_read_metadata2(self): layer = query(self.uri, self.layer_name, 5) actual_metadata = layer.layer_metadata expected_metadata = read_layer_metadata(self.uri, self.layer_name, 5) self.assertEqual(actual_metadata.to_dict(), expected_metadata.to_dict()) def test_layer_ids(self): ids = AttributeStore(self.uri).layers() self.assertTrue(len(ids) == 12) def test_attributestore(self): store = AttributeStore(self.uri) layer_name = "boop-epsg-bop" value = {"first": 113, "second": "44two"} store.layer(layer_name, 34).write("val", value) self.assertEqual(value, store.layer(layer_name, 34).read("val")) self.assertEqual(value, store.layer(layer_name, 34)["val"]) store.layer(layer_name, 34).delete("val") with pytest.raises(KeyError): store.layer(layer_name, 34)["val"]
class ToGeoTiffTest(BaseTestClass): dir_path = file_path("all-ones.tif") rdd = get(LayerType.SPATIAL, dir_path, max_tile_size=1024) metadata = rdd.collect_metadata() mapped_types = { 'int8': 'BYTE', 'uint8': 'UBYTE', 'int16': 'SHORT', 'uint16': 'USHORT', 'int32': 'INT', 'float': 'FLOAT', 'float32': 'FLOAT', 'double': 'DOUBLE' } @pytest.fixture(autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def test_to_geotiff_rdd_rasterlayer(self): geotiff_rdd = self.rdd.to_geotiff_rdd(storage_method="Tiled", compression="DeflateCompression", color_space=0, head_tags={'INTERLEAVE': 'BAND'}) geotiff_bytes = geotiff_rdd.first()[1] with tempfile.NamedTemporaryFile() as temp: temp.write(geotiff_bytes) temp_path = pathlib.Path(temp.name) with rasterio.open(str(temp_path)) as src: self.assertTrue(src.is_tiled) profile = src.profile self.assertEqual(profile['blockxsize'], 256) self.assertEqual(profile['blockysize'], 256) self.assertEqual(profile['interleave'], 'band') self.assertEqual(src.compression, rasterio.enums.Compression.deflate) def test_to_geotiff_rdd_tiledrasterlayer(self): tiled_rdd = self.rdd.tile_to_layout() tiled_collected = tiled_rdd.to_numpy_rdd().first()[1] geotiff_rdd = tiled_rdd.to_geotiff_rdd() geotiff_collected = geotiff_rdd.first()[1] def to_geotiff(x): with tempfile.NamedTemporaryFile() as temp: temp.write(x) temp_path = pathlib.Path(temp.name) with rasterio.open(str(temp_path)) as src: self.assertFalse(src.is_tiled) data = src.read() return Tile(data, self.mapped_types[str(data.dtype)], src.nodata) rasterio_geotiff = to_geotiff(geotiff_collected) self.assertTrue( (tiled_collected.cells == rasterio_geotiff.cells).all()) self.assertEqual(tiled_collected.cell_type, rasterio_geotiff.cell_type) self.assertEqual(tiled_collected.no_data_value, rasterio_geotiff.no_data_value)
class Multiband(S3GeoTiffIOTest, BaseTestClass): mock_wrapper = BaseTestClass.pysc._gateway.jvm.geopyspark.geotrellis.testkit.MockS3ClientWrapper client = mock_wrapper.mockClient() key = "one-month-tiles-multiband/result.tif" bucket = "test" uri = "s3://test/one-month-tiles-multiband/result.tif" file_path = file_path(key) options = {"s3Client": "mock"} in_file = open(file_path, "rb") cells = in_file.read() in_file.close() @pytest.fixture(scope='class', autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def read_multiband_geotrellis(self, opt=options): self.client.putObject(self.bucket, self.key, self.cells) result = get(LayerType.SPATIAL, self.uri, s3_client=opt['s3Client'], max_tile_size=opt.get('maxTileSize')) return result def test_segment_tiles(self): # GeoTrellis will read GeoTiff Segments given no window size # Retile them to match Rasterio read and check the cell values geotrellis_tiles = self.read_multiband_geotrellis()\ .tile_to_layout(LocalLayout(512))\ .to_numpy_rdd().collect() # TODO: assert there is only one geotrellis tile geotrellis_tile = dict(geotrellis_tiles)[SpatialKey(0, 0)] rasterio_tiles = self.read_geotiff_rasterio([self.file_path], False) self.assertEquals(len(rasterio_tiles), 1) rasterio_tile = rasterio_tiles[0] self.assertTilesEqual(geotrellis_tile.cells, rasterio_tile['cells']) def windowed_result_checker(self, windowed_tiles): self.assertEqual(len(windowed_tiles), 4) def test_windowed_tiles(self): geotrellis_tiles = self.read_multiband_geotrellis({ "s3Client": "mock", "maxTileSize": 256 }) geotrellis_tiles = geotrellis_tiles.to_numpy_rdd().values().collect() sorted_1 = sorted(geotrellis_tiles, key=lambda x: x.cells[0, 0, 0]) rasterio_tiles = self.read_geotiff_rasterio([self.file_path], True) sorted_2 = sorted(rasterio_tiles, key=lambda x: x['cells'][0, 0, 0]) self.windowed_result_checker(geotrellis_tiles) for x, y in zip(sorted_1, sorted_2): print('\n') print('This is read in from geotrellis', x.cells.shape) print('This is read in from rasterio', y['cells'].shape) self.assertTrue(np.array_equal(x.cells, y['cells']))