示例#1
0
class Singleband(GeoTiffIOTest, BaseTestClass):
    dir_path = file_path("one-month-tiles/")

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def read_singleband_geotrellis(self, options=None):
        if options is None:
            result = get(LayerType.SPATIAL,
                         self.dir_path,
                         max_tile_size=512)
        else:
            result = get(LayerType.SPATIAL,
                         self.dir_path,
                         max_tile_size=256)

        return result.to_numpy_rdd().values().collect()

    def test_whole_tiles(self):
        geotrellis_tiles = self.read_singleband_geotrellis()

        file_paths = self.get_filepaths(self.dir_path)
        rasterio_tiles = self.read_geotiff_rasterio(file_paths, False)

        for x, y in zip(geotrellis_tiles, rasterio_tiles):
            self.assertTrue((x.cells == y['cells']).all())
            self.assertEqual(x.no_data_value, y['no_data_value'])

    def windowed_result_checker(self, windowed_tiles):
        self.assertEqual(len(windowed_tiles), 4)

    def test_windowed_tiles(self):
        geotrellis_tiles = self.read_singleband_geotrellis(True)
        sorted_1 = sorted(geotrellis_tiles, key=lambda x: (x.cells[0, 0, 0], x.cells[0, 255, 255]))

        file_paths = self.get_filepaths(self.dir_path)
        rasterio_tiles = self.read_geotiff_rasterio(file_paths, True)
        sorted_2 = sorted(rasterio_tiles, key=lambda x: (x['cells'][0, 0, 0], x['cells'][0, 255, 255]))

        self.windowed_result_checker(geotrellis_tiles)

        for x, y in zip(sorted_1, sorted_2):
            self.assertEqual(x.cells.shape, y['cells'].shape)
            self.assertTrue((x.cells == y['cells']).all())
            self.assertEqual(x.no_data_value, y['no_data_value'])
示例#2
0
class COGTest(BaseTestClass):
    dir_path = file_path("catalog/")
    uri = "file://{}".format(dir_path)
    layer_name = "cog-layer"

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def test_read_value(self):
        tiled = read_value(self.uri,
                           self.layer_name,
                           11,
                           1450,
                           966)

        self.assertEqual(tiled.cells.shape, (1, 256, 256))

    def test_bad_read_value(self):
        tiled = read_value(self.uri,
                           self.layer_name,
                           11,
                           1450,
                           2000)

        self.assertEqual(tiled, None)

    @pytest.mark.skipif('TRAVIS' in os.environ,
                        reason="test_query does not pass on Travis")
    def test_query(self):
        intersection = box(74.88280541992188, 9.667967675781256, 75.05858666503909, 10.019530136718743)
        queried = query(self.uri, self.layer_name, 11, intersection)
        result = queried.count()

        self.assertEqual(result, 4)

    def test_read_metadata(self):
        layer = query(self.uri, self.layer_name, 5)
        actual_metadata = layer.layer_metadata

        expected_metadata = read_layer_metadata(self.uri, self.layer_name, 5)

        self.assertEqual(actual_metadata.to_dict(), expected_metadata.to_dict())
class PartitionPreservationTest(BaseTestClass):
    rdd = get(LayerType.SPATIAL,
              file_path("srtm_52_11.tif"),
              max_tile_size=6001)

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def test_partition_preservation(self):
        partition_states = []
        strategy = SpatialPartitionStrategy(16)

        tiled = self.rdd.tile_to_layout()

        tiled2 = self.rdd.tile_to_layout(partition_strategy=strategy)
        partition_states.append(tiled2.get_partition_strategy())

        added_layer = (tiled + tiled2) * 0.75
        partition_states.append(added_layer.get_partition_strategy())

        local_max_layer = added_layer.local_max(tiled)
        partition_states.append(local_max_layer.get_partition_strategy())

        focal_layer = local_max_layer.focal(Operation.MAX, Square(1))
        partition_states.append(focal_layer.get_partition_strategy())

        reprojected_layer = focal_layer.tile_to_layout(
            layout=LocalLayout(), target_crs=3857, partition_strategy=strategy)
        partition_states.append(reprojected_layer.get_partition_strategy())

        pyramided = reprojected_layer.pyramid()
        partition_states.append(
            pyramided.levels[pyramided.max_zoom].get_partition_strategy())

        self.assertTrue(all(x == partition_states[0]
                            for x in partition_states))
示例#4
0
    def test_rasterization(self):
        features = osm_reader.from_orc(file_path("zerns.orc"))

        lines = features.get_way_features_rdd()

        def assign_cellvalues(feature):
            tags = feature.properties.tags.values()

            if 'water' in tags:
                return Feature(feature.geometry, CellValue(4, 4))
            elif "en:Zern's Farmer's Market" in tags:
                return Feature(feature.geometry, CellValue(3, 3))
            else:
                return Feature(feature.geometry, CellValue(1, 1))

        mapped_lines = lines.map(lambda feature: assign_cellvalues(feature))

        result = rasterize_features(mapped_lines,
                                    4326,
                                    12,
                                    cell_type=CellType.INT8)

        self.assertEqual(result.get_min_max(), (1, 4))
        self.assertEqual(result.count(), 1)
示例#5
0
class CatalogTest(BaseTestClass):
    uri = file_path("srtm_52_11.tif")

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    @pytest.mark.skipif('TRAVIS' in os.environ,
                         reason="Cannot resolve depency issues in Travis for the time being")
    def test_tiles(self):
        import geopyspark as gps
        from geopyspark.geotrellis import rasterio
        tiles = rasterio._read_windows(self.uri, xcols=256, ycols=256, bands=None, crs_to_proj4=lambda n: '+proj=longlat +datum=WGS84 +no_defs ')
        self.assertEqual(len(list(tiles)), 144)

    @pytest.mark.skipif('TRAVIS' in os.environ,
                         reason="Cannot resolve depency issues in Travis for the time being")
    def test_layer(self):
        import geopyspark as gps
        from geopyspark.geotrellis import rasterio
        rdd0 = gps.rasterio.get(self.uri)
        rdd1 = gps.RasterLayer.from_numpy_rdd(gps.LayerType.SPATIAL, rdd0)
        self.assertEqual(rdd1.count(), 144)
示例#6
0
class CatalogTest(BaseTestClass):
    rdd = get(LayerType.SPATIAL,
              file_path("srtm_52_11.tif"),
              max_tile_size=6001)

    metadata = rdd.collect_metadata()
    reprojected = rdd.tile_to_layout(layout=GlobalLayout(zoom=11),
                                     target_crs=3857)
    result = reprojected.pyramid()

    dir_path = file_path("catalog/")
    uri = "file://{}".format(dir_path)
    layer_name = "catalog-test"

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    @pytest.mark.skip
    def test_read(self):
        for x in range(11, 0, -1):
            actual_layer = query(self.uri, self.layer_name,
                                 x).tile_to_layout(LocalLayout(),
                                                   self.metadata.crs)
            expected_layer = self.result.levels[x].tile_to_layout(
                LocalLayout(), self.metadata.crs)

            actual_md = actual_layer.layer_metadata
            expected_md = expected_layer.layer_metadata

            self.assertEqual(actual_md.tile_layout, expected_md.tile_layout)
            self.assertEqual(actual_md.layout_definition,
                             expected_md.layout_definition)
            self.assertEqual(actual_md.bounds, expected_md.bounds)

    def test_read_value(self):
        tiled = read_value(self.uri, self.layer_name, 11, 1450, 966)

        self.assertEqual(tiled.cells.shape, (1, 256, 256))

    def test_bad_read_value(self):
        tiled = read_value(self.uri, self.layer_name, 11, 1450, 2000)

        self.assertEqual(tiled, None)

    @pytest.mark.skipif('TRAVIS' in os.environ,
                        reason="test_query_1 causes issues on Travis")
    def test_query1(self):
        intersection = box(8348915.46680623, 543988.943201519, 8348915.4669,
                           543988.943201520)
        queried = query(self.uri, self.layer_name, 11, intersection)

        self.assertEqual(queried.to_numpy_rdd().first()[0],
                         SpatialKey(1450, 996))

    def test_query2(self):
        intersection = Extent(8348915.46680623, 543988.943201519, 8348915.4669,
                              543988.943201520)
        queried = query(self.uri,
                        self.layer_name,
                        11,
                        intersection,
                        query_proj=3857)

        self.assertEqual(queried.to_numpy_rdd().first()[0],
                         SpatialKey(1450, 996))

    def test_query3(self):
        intersection = box(8348915.46680623, 543988.943201519, 8348915.4669,
                           543988.943201520).wkb
        queried = query(self.uri, self.layer_name, 11, intersection)

        self.assertEqual(queried.to_numpy_rdd().first()[0],
                         SpatialKey(1450, 996))

    def test_query4(self):
        intersection = 42
        with pytest.raises(TypeError):
            queried = query(self.uri,
                            self.layer_name,
                            11,
                            query_geom=intersection,
                            num_partitions=2)
            result = queried.to_numpy_rdd().first()[0]

    def test_query_partitions(self):
        intersection = box(8348915.46680623, 543988.943201519, 8348915.4669,
                           543988.943201520)
        queried = query(self.uri,
                        self.layer_name,
                        11,
                        intersection,
                        num_partitions=2)

        self.assertEqual(queried.to_numpy_rdd().first()[0],
                         SpatialKey(1450, 996))

    def test_query_crs(self):
        intersection = box(74.99958369653905, 4.8808219582513095,
                           74.99958369738141, 4.880821958251324)
        queried = query(self.uri,
                        self.layer_name,
                        11,
                        intersection,
                        query_proj=4326)

        self.assertEqual(queried.to_numpy_rdd().first()[0],
                         SpatialKey(1450, 996))

    def test_read_metadata_exception(self):
        uri = "abcxyz://123"
        with pytest.raises(ValueError):
            layer = read_layer_metadata(uri, self.layer_name, 5)

    def test_read_metadata1(self):
        layer = query(self.uri, self.layer_name, 5)
        actual_metadata = layer.layer_metadata

        expected_metadata = read_layer_metadata(self.uri, self.layer_name, 5)

    def test_read_metadata2(self):
        layer = query(self.uri, self.layer_name, 5)
        actual_metadata = layer.layer_metadata

        expected_metadata = read_layer_metadata(self.uri, self.layer_name, 5)

        self.assertEqual(actual_metadata.to_dict(),
                         expected_metadata.to_dict())

    def test_layer_ids(self):
        ids = AttributeStore(self.uri).layers()
        self.assertTrue(len(ids) == 11)

    def test_attributestore(self):
        store = AttributeStore(self.uri)
        layer_name = "boop-epsg-bop"
        value = {"first": 113, "second": "44two"}
        store.layer(layer_name, 34).write("val", value)
        self.assertEqual(value, store.layer(layer_name, 34).read("val"))

        self.assertEqual(value, store.layer(layer_name, 34)["val"])
        store.layer(layer_name, 34).delete("val")
        with pytest.raises(KeyError):
            store.layer(layer_name, 34)["val"]
class GeoTiffRasterRDDTest(BaseTestClass):
    dir_path = file_path("all-ones.tif")
    result = get(LayerType.SPATIAL, dir_path, max_tile_size=256)

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def test_repartition(self):
        md = self.result.collect_metadata()
        laid_out_rdd = BaseTestClass.rdd.tile_to_layout(md)
        repartitioned = laid_out_rdd.repartition(2)
        self.assertEqual(repartitioned.getNumPartitions(), 2)

    def test_partitionBy(self):
        tiled = self.result.tile_to_layout()

        strategy = SpatialPartitionStrategy(2)
        repartitioned = tiled.partitionBy(strategy)

        self.assertEqual(repartitioned.get_partition_strategy(), strategy)

    def test_to_numpy_rdd(self, option=None):
        pyrdd = self.result.to_numpy_rdd()
        (key, tile) = pyrdd.first()
        self.assertEqual(tile.cells.shape, (1, 256, 256))

    def test_collect_metadata(self, options=None):
        md = self.result.collect_metadata()
        self.assertTrue('+proj=longlat' in md.crs)
        self.assertTrue('+datum=WGS84' in md.crs)

    def test_reproject(self, options=None):
        tiles = self.result.reproject("EPSG:3857")
        md = tiles.collect_metadata()
        self.assertTrue('+proj=merc' in md.crs)

    def test_to_ud_ubyte(self):
        arr = np.array([[0.4324323432124, 0.0, 0.0], [1.0, 1.0, 1.0]],
                       dtype=float)

        epsg_code = 3857
        extent = Extent(0.0, 0.0, 10.0, 10.0)
        projected_extent = ProjectedExtent(extent, epsg_code)

        tile = Tile(arr, 'FLOAT', float('nan'))
        rdd = BaseTestClass.pysc.parallelize([(projected_extent, tile)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)

        converted = raster_rdd.convert_data_type(CellType.UINT8,
                                                 no_data_value=-1)
        tile = converted.to_numpy_rdd().first()
        no_data = tile[1].no_data_value

        self.assertEqual(no_data, -1)

    def test_no_data_deserialization(self):
        arr = np.int16([[[-32768, -32768, -32768, -32768],
                         [-32768, -32768, -32768, -32768],
                         [-32768, -32768, -32768, -32768],
                         [-32768, -32768, -32768, -32768]]])

        epsg_code = 3857
        extent = Extent(0.0, 0.0, 10.0, 10.0)
        projected_extent = ProjectedExtent(extent, epsg_code)

        tile = Tile(arr, 'SHORT', -32768)
        rdd = BaseTestClass.pysc.parallelize([(projected_extent, tile)])
        raster_layer = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)

        actual_tile = raster_layer.to_numpy_rdd().first()[1]

        self.assertEqual(actual_tile.cell_type, tile.cell_type)
        self.assertEqual(actual_tile.no_data_value, tile.no_data_value)
        self.assertTrue((actual_tile.cells == tile.cells).all())
示例#8
0
class CatalogTest(BaseTestClass):
    dir_path = file_path("catalog/")
    uri = "file://{}".format(dir_path)
    layer_name = "catalog-test"

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def test_read_value(self):
        tiled = read_value(self.uri, self.layer_name, 11, 1450, 966)

        self.assertEqual(tiled.cells.shape, (1, 256, 256))

    def test_bad_read_value(self):
        tiled = read_value(self.uri, self.layer_name, 11, 1450, 2000)

        self.assertEqual(tiled, None)

    @pytest.mark.skipif('TRAVIS' in os.environ,
                        reason="test_query_1 causes issues on Travis")
    def test_query1(self):
        intersection = box(8348915.46680623, 543988.943201519, 8348915.4669,
                           543988.943201520)
        queried = query(self.uri, self.layer_name, 11, intersection)

        self.assertEqual(queried.to_numpy_rdd().first()[0],
                         SpatialKey(1450, 996))

    def test_query2(self):
        intersection = Extent(8348915.46680623, 543988.943201519, 8348915.4669,
                              543988.943201520)
        queried = query(self.uri,
                        self.layer_name,
                        11,
                        intersection,
                        query_proj=3857)

        self.assertEqual(queried.to_numpy_rdd().first()[0],
                         SpatialKey(1450, 996))

    def test_query3(self):
        intersection = box(8348915.46680623, 543988.943201519, 8348915.4669,
                           543988.943201520).wkb
        queried = query(self.uri, self.layer_name, 11, intersection)

        self.assertEqual(queried.to_numpy_rdd().first()[0],
                         SpatialKey(1450, 996))

    def test_query4(self):
        intersection = 42
        with pytest.raises(TypeError):
            queried = query(self.uri,
                            self.layer_name,
                            11,
                            query_geom=intersection,
                            num_partitions=2)
            result = queried.to_numpy_rdd().first()[0]

    def test_query_partitions(self):
        intersection = box(8348915.46680623, 543988.943201519, 8348915.4669,
                           543988.943201520)
        queried = query(self.uri,
                        self.layer_name,
                        11,
                        intersection,
                        num_partitions=2)

        self.assertEqual(queried.to_numpy_rdd().first()[0],
                         SpatialKey(1450, 996))

    def test_query_crs(self):
        intersection = box(74.99958369653905, 4.8808219582513095,
                           74.99958369738141, 4.880821958251324)
        queried = query(self.uri,
                        self.layer_name,
                        11,
                        intersection,
                        query_proj=4326)

        self.assertEqual(queried.to_numpy_rdd().first()[0],
                         SpatialKey(1450, 996))

    def test_read_metadata_exception(self):
        uri = "abcxyz://123"
        with pytest.raises(ValueError):
            layer = read_layer_metadata(uri, self.layer_name, 5)

    def test_read_metadata1(self):
        layer = query(self.uri, self.layer_name, 5)
        actual_metadata = layer.layer_metadata

        expected_metadata = read_layer_metadata(self.uri, self.layer_name, 5)

    def test_read_metadata2(self):
        layer = query(self.uri, self.layer_name, 5)
        actual_metadata = layer.layer_metadata

        expected_metadata = read_layer_metadata(self.uri, self.layer_name, 5)

        self.assertEqual(actual_metadata.to_dict(),
                         expected_metadata.to_dict())

    def test_layer_ids(self):
        ids = AttributeStore(self.uri).layers()
        self.assertTrue(len(ids) == 12)

    def test_attributestore(self):
        store = AttributeStore(self.uri)
        layer_name = "boop-epsg-bop"
        value = {"first": 113, "second": "44two"}
        store.layer(layer_name, 34).write("val", value)
        self.assertEqual(value, store.layer(layer_name, 34).read("val"))

        self.assertEqual(value, store.layer(layer_name, 34)["val"])
        store.layer(layer_name, 34).delete("val")
        with pytest.raises(KeyError):
            store.layer(layer_name, 34)["val"]
示例#9
0
class ToGeoTiffTest(BaseTestClass):
    dir_path = file_path("all-ones.tif")
    rdd = get(LayerType.SPATIAL, dir_path, max_tile_size=1024)
    metadata = rdd.collect_metadata()

    mapped_types = {
        'int8': 'BYTE',
        'uint8': 'UBYTE',
        'int16': 'SHORT',
        'uint16': 'USHORT',
        'int32': 'INT',
        'float': 'FLOAT',
        'float32': 'FLOAT',
        'double': 'DOUBLE'
    }

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def test_to_geotiff_rdd_rasterlayer(self):
        geotiff_rdd = self.rdd.to_geotiff_rdd(storage_method="Tiled",
                                              compression="DeflateCompression",
                                              color_space=0,
                                              head_tags={'INTERLEAVE': 'BAND'})

        geotiff_bytes = geotiff_rdd.first()[1]

        with tempfile.NamedTemporaryFile() as temp:
            temp.write(geotiff_bytes)
            temp_path = pathlib.Path(temp.name)

            with rasterio.open(str(temp_path)) as src:
                self.assertTrue(src.is_tiled)

                profile = src.profile

                self.assertEqual(profile['blockxsize'], 256)
                self.assertEqual(profile['blockysize'], 256)
                self.assertEqual(profile['interleave'], 'band')
                self.assertEqual(src.compression,
                                 rasterio.enums.Compression.deflate)

    def test_to_geotiff_rdd_tiledrasterlayer(self):
        tiled_rdd = self.rdd.tile_to_layout()
        tiled_collected = tiled_rdd.to_numpy_rdd().first()[1]

        geotiff_rdd = tiled_rdd.to_geotiff_rdd()
        geotiff_collected = geotiff_rdd.first()[1]

        def to_geotiff(x):
            with tempfile.NamedTemporaryFile() as temp:
                temp.write(x)
                temp_path = pathlib.Path(temp.name)

                with rasterio.open(str(temp_path)) as src:
                    self.assertFalse(src.is_tiled)
                    data = src.read()
                    return Tile(data, self.mapped_types[str(data.dtype)],
                                src.nodata)

        rasterio_geotiff = to_geotiff(geotiff_collected)

        self.assertTrue(
            (tiled_collected.cells == rasterio_geotiff.cells).all())
        self.assertEqual(tiled_collected.cell_type, rasterio_geotiff.cell_type)
        self.assertEqual(tiled_collected.no_data_value,
                         rasterio_geotiff.no_data_value)
示例#10
0
class Multiband(S3GeoTiffIOTest, BaseTestClass):
    mock_wrapper = BaseTestClass.pysc._gateway.jvm.geopyspark.geotrellis.testkit.MockS3ClientWrapper
    client = mock_wrapper.mockClient()

    key = "one-month-tiles-multiband/result.tif"
    bucket = "test"

    uri = "s3://test/one-month-tiles-multiband/result.tif"
    file_path = file_path(key)
    options = {"s3Client": "mock"}

    in_file = open(file_path, "rb")
    cells = in_file.read()
    in_file.close()

    @pytest.fixture(scope='class', autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def read_multiband_geotrellis(self, opt=options):
        self.client.putObject(self.bucket, self.key, self.cells)
        result = get(LayerType.SPATIAL,
                     self.uri,
                     s3_client=opt['s3Client'],
                     max_tile_size=opt.get('maxTileSize'))

        return result

    def test_segment_tiles(self):
        # GeoTrellis will read GeoTiff Segments given no window size
        # Retile them to match Rasterio read and check the cell values
        geotrellis_tiles = self.read_multiband_geotrellis()\
                               .tile_to_layout(LocalLayout(512))\
                               .to_numpy_rdd().collect()
        # TODO: assert there is only one geotrellis tile
        geotrellis_tile = dict(geotrellis_tiles)[SpatialKey(0, 0)]

        rasterio_tiles = self.read_geotiff_rasterio([self.file_path], False)
        self.assertEquals(len(rasterio_tiles), 1)
        rasterio_tile = rasterio_tiles[0]

        self.assertTilesEqual(geotrellis_tile.cells, rasterio_tile['cells'])

    def windowed_result_checker(self, windowed_tiles):
        self.assertEqual(len(windowed_tiles), 4)

    def test_windowed_tiles(self):
        geotrellis_tiles = self.read_multiband_geotrellis({
            "s3Client": "mock",
            "maxTileSize": 256
        })
        geotrellis_tiles = geotrellis_tiles.to_numpy_rdd().values().collect()
        sorted_1 = sorted(geotrellis_tiles, key=lambda x: x.cells[0, 0, 0])

        rasterio_tiles = self.read_geotiff_rasterio([self.file_path], True)
        sorted_2 = sorted(rasterio_tiles, key=lambda x: x['cells'][0, 0, 0])

        self.windowed_result_checker(geotrellis_tiles)

        for x, y in zip(sorted_1, sorted_2):
            print('\n')
            print('This is read in from geotrellis', x.cells.shape)
            print('This is read in from rasterio', y['cells'].shape)
            self.assertTrue(np.array_equal(x.cells, y['cells']))