Пример #1
0
def _create_spacetime_layer(cells: np.ndarray = None) -> TiledRasterLayer:
    # TODO all these "create_spacetime_layer" functions are duplicated across all tests
    #       and better should be moved to some kind of general factory or test fixture
    assert len(cells.shape) == 4
    tile = Tile.from_numpy_array(cells, -1)

    layer = [(SpaceTimeKey(0, 0, now), tile), (SpaceTimeKey(1, 0, now), tile),
             (SpaceTimeKey(0, 1, now), tile), (SpaceTimeKey(1, 1, now), tile)]

    rdd = SparkContext.getOrCreate().parallelize(layer)

    metadata = {
        'cellType': 'int32ud-1',
        'extent': extent,
        'crs': '+proj=longlat +datum=WGS84 +no_defs ',
        'bounds': {
            'minKey': {
                'col': 0,
                'row': 0,
                'instant': _convert_to_unix_time(now)
            },
            'maxKey': {
                'col': 1,
                'row': 1,
                'instant': _convert_to_unix_time(now)
            }
        },
        'layoutDefinition': {
            'extent': extent,
            'tileLayout': layout
        }
    }

    return TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd, metadata)
Пример #2
0
def imagecollection_with_two_bands_and_three_dates_webmerc(request):
    from geopyspark.geotrellis import (SpaceTimeKey, Tile, _convert_to_unix_time)
    from geopyspark.geotrellis.constants import LayerType
    from geopyspark.geotrellis.layer import TiledRasterLayer
    import geopyspark as gps

    from openeogeotrellis.geopysparkdatacube import GeopysparkDataCube,GeopysparkCubeMetadata

    date1, date3, rdd = numpy_rdd_two_bands_and_three_dates()

    metadata = {'cellType': 'int32ud-1',
                'extent': extent_webmerc,
                'crs': '+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +no_defs ',
                'bounds': {
                    'minKey': {'col': 0, 'row': 0, 'instant': _convert_to_unix_time(date1)},
                    'maxKey': {'col': 1, 'row': 1, 'instant': _convert_to_unix_time(date3)}
                },
                'layoutDefinition': {
                    'extent': extent_webmerc,
                    'tileLayout': layout
                }
                }

    geopyspark_layer = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd, metadata)

    datacube = GeopysparkDataCube(pyramid=gps.Pyramid({0: geopyspark_layer}), metadata=GeopysparkCubeMetadata(openeo_metadata))
    if request.instance:
        request.instance.imagecollection_with_two_bands_and_three_dates = datacube
    return datacube
Пример #3
0
    def create_spacetime_layer(self):
        cells = np.array([self.first, self.second], dtype='int')
        tile = Tile.from_numpy_array(cells, -1)

        layer = [(SpaceTimeKey(0, 0, self.now), tile),
                 (SpaceTimeKey(1, 0, self.now), tile),
                 (SpaceTimeKey(0, 1, self.now), tile),
                 (SpaceTimeKey(1, 1, self.now), tile)]

        rdd = SparkContext.getOrCreate().parallelize(layer)

        metadata = {'cellType': 'int32ud-1',
                    'extent': self.extent,
                    'crs': '+proj=longlat +datum=WGS84 +no_defs ',
                    'bounds': {
                        'minKey': {'col': 0, 'row': 0, 'instant': _convert_to_unix_time(self.now)},
                        'maxKey': {'col': 1, 'row': 1, 'instant': _convert_to_unix_time(self.now)}
                    },
                    'layoutDefinition': {
                        'extent': self.extent,
                        'tileLayout': self.layout
                    }
                    }

        return TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd, metadata)
Пример #4
0
    def _single_pixel_layer(self, grid_value_by_datetime, no_data=-1.0):
        from collections import OrderedDict

        sorted_by_datetime = OrderedDict(sorted(
            grid_value_by_datetime.items()))

        def elem(timestamp, value):
            tile = self._single_pixel_tile(value, no_data)
            return [(SpaceTimeKey(0, 0, timestamp), tile)]

        layer = [
            elem(timestamp, value)
            for timestamp, value in sorted_by_datetime.items()
        ]
        rdd = SparkContext.getOrCreate().parallelize(layer)

        datetimes = list(sorted_by_datetime.keys())

        extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 1.0, 'ymax': 1.0}
        layout = {
            'layoutCols': 1,
            'layoutRows': 1,
            'tileCols': 1,
            'tileRows': 1
        }
        metadata = {
            'cellType': 'float32ud%f' % no_data,
            'extent': extent,
            'crs': '+proj=longlat +datum=WGS84 +no_defs ',
            'bounds': {
                'minKey': {
                    'col': 0,
                    'row': 0,
                    'instant': _convert_to_unix_time(datetimes[0])
                },
                'maxKey': {
                    'col': 0,
                    'row': 0,
                    'instant': _convert_to_unix_time(datetimes[-1])
                }
            },
            'layoutDefinition': {
                'extent': extent,
                'tileLayout': layout
            }
        }

        return TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd,
                                               metadata)
Пример #5
0
    def _create_spacetime_layer(self, no_data):
        def tile(value):
            cells = np.zeros((4, 4), dtype=float)
            cells.fill(value)
            return Tile.from_numpy_array(cells, no_data)

        tiles = [(SpaceTimeKey(0, 0, self.now), tile(0)),
                 (SpaceTimeKey(1, 0, self.now), tile(1)),
                 (SpaceTimeKey(0, 1, self.now), tile(2)),
                 (SpaceTimeKey(1, 1, self.now), tile(no_data))]

        for tile in tiles:
            print(tile)

        layout = {
            'layoutCols': 2,
            'layoutRows': 2,
            'tileCols': 4,
            'tileRows': 4
        }
        extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 8.0, 'ymax': 8.0}

        rdd = SparkContext.getOrCreate().parallelize(tiles)
        print(rdd.count())

        metadata = {
            'cellType': 'float64ud-1',
            'extent': extent,
            'crs': '+proj=longlat +datum=WGS84 +no_defs ',
            'bounds': {
                'minKey': {
                    'col': 0,
                    'row': 0,
                    'instant': _convert_to_unix_time(self.now)
                },
                'maxKey': {
                    'col': 1,
                    'row': 1,
                    'instant': _convert_to_unix_time(self.now)
                }
            },
            'layoutDefinition': {
                'extent': extent,
                'tileLayout': layout
            }
        }

        return TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd,
                                               metadata)
Пример #6
0
def to_pb_temporal_projected_extent(obj):
    """Converts an instance of ``TemporalProjectedExtent`` to ``ProtoTemporalProjectedExtent``.

    Args:
        obj (:class:`~geopyspark.geotrellis.TemporalProjectedExtent`): An instance of
            ``TemporalProjectedExtent``.

    Returns:
        ProtoTemporalProjectedExtent
    """

    tpex = extentMessages_pb2.ProtoTemporalProjectedExtent()

    crs = extentMessages_pb2.ProtoCRS()
    ex = to_pb_extent(obj.extent)

    if obj.epsg:
        crs.epsg = obj.epsg
    else:
        crs.proj4 = obj.proj4

    tpex.extent.CopyFrom(ex)
    tpex.crs.CopyFrom(crs)
    tpex.instant = _convert_to_unix_time(obj.instant)

    return tpex
Пример #7
0
def imagecollection_with_two_bands_and_one_date(request):
    import geopyspark as gps
    from geopyspark.geotrellis import (SpaceTimeKey, Tile, _convert_to_unix_time)
    from geopyspark.geotrellis.constants import LayerType
    from geopyspark.geotrellis.layer import TiledRasterLayer
    from pyspark import SparkContext

    from openeogeotrellis.geopysparkdatacube import GeopysparkDataCube

    print(request)
    two_band_one_two = np.array([matrix_of_one, matrix_of_two], dtype='int')
    tile = Tile.from_numpy_array(two_band_one_two, -1)

    date1 = datetime.datetime.strptime("2017-09-25T11:37:00Z", '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=pytz.UTC)


    layer = [(SpaceTimeKey(0, 0, date1), tile),
             (SpaceTimeKey(1, 0, date1), tile),
             (SpaceTimeKey(0, 1, date1), tile),
             (SpaceTimeKey(1, 1, date1), tile)]

    rdd = SparkContext.getOrCreate().parallelize(layer)

    metadata = {'cellType': 'int32ud-1',
                'extent': extent,
                'crs': '+proj=longlat +datum=WGS84 +no_defs ',
                'bounds': {
                    'minKey': {'col': 0, 'row': 0, 'instant': _convert_to_unix_time(date1)},
                    'maxKey': {'col': 1, 'row': 1, 'instant': _convert_to_unix_time(date1)}
                },
                'layoutDefinition': {
                    'extent': extent,
                    'tileLayout': layout
                }
                }

    geopyspark_layer = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd, metadata)

    datacube = GeopysparkDataCube(pyramid=gps.Pyramid({0: geopyspark_layer}), metadata=openeo_metadata)

    if request.instance:
        request.instance.imagecollection_with_two_bands_and_one_date = datacube
    return datacube
Пример #8
0
    def create_spacetime_unsigned_byte_layer(self):
        """
        Returns a single-band uint8ud255 layer consisting of four tiles that each look like this:

         ND 220 220 220
        220 220 220 220
        220 220 220 220
        220 220 220 220

        The extent is (0.0, 0.0) to (4.0, 4.0).
        """
        no_data = 255

        single_band = np.zeros((1, 4, 4))
        single_band.fill(220)
        single_band[0, 0, 0] = no_data

        cells = np.array([single_band], dtype='uint8')
        tile = Tile.from_numpy_array(cells, no_data)

        layer = [(SpaceTimeKey(0, 0, self.now), tile),
                 (SpaceTimeKey(1, 0, self.now), tile),
                 (SpaceTimeKey(0, 1, self.now), tile),
                 (SpaceTimeKey(1, 1, self.now), tile)]

        rdd = SparkContext.getOrCreate().parallelize(layer)

        metadata = {
            'cellType': 'uint8ud255',
            'extent': self.extent,
            'crs': '+proj=longlat +datum=WGS84 +no_defs ',
            'bounds': {
                'minKey': {'col': 0, 'row': 0, 'instant': _convert_to_unix_time(self.now)},
                'maxKey': {'col': 1, 'row': 1, 'instant': _convert_to_unix_time(self.now)}
            },
            'layoutDefinition': {
                'extent': self.extent,
                'tileLayout': self.layout
            }
        }

        return TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd, metadata)
Пример #9
0
def to_pb_space_time_key(obj):
    """Converts an instance of ``SpaceTimeKey`` to ``ProtoSpaceTimeKey``.

    Args:
        obj (:obj:`~geopyspark.geotrellis.SpaceTimeKey`): An instance of ``SpaceTimeKey``.

    Returns:
        ProtoSpaceTimeKey
    """

    space_time_key = keyMessages_pb2.ProtoSpaceTimeKey()

    space_time_key.col = obj.col
    space_time_key.row = obj.row
    space_time_key.instant = _convert_to_unix_time(obj.instant)

    return space_time_key
Пример #10
0
class TestMultipleDates(TestCase):
    band1 = np.array([[-1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                      [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                      [1.0, 1.0, 1.0, 1.0, 1.0]])

    band2 = np.array([[2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0],
                      [2.0, 2.0, -1.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0],
                      [2.0, 2.0, 2.0, 2.0, 2.0]])

    tile = Tile.from_numpy_array(band1, no_data_value=-1.0)
    tile2 = Tile.from_numpy_array(band2, no_data_value=-1.0)
    time_1 = datetime.datetime.strptime("2016-08-24T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')
    time_2 = datetime.datetime.strptime("2017-08-24T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')
    time_3 = datetime.datetime.strptime("2017-10-17T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')

    layer = [(SpaceTimeKey(0, 0, time_1), tile),
             (SpaceTimeKey(1, 0, time_1), tile2),
             (SpaceTimeKey(0, 1, time_1), tile),
             (SpaceTimeKey(1, 1, time_1), tile),
             (SpaceTimeKey(0, 0, time_2), tile2),
             (SpaceTimeKey(1, 0, time_2), tile2),
             (SpaceTimeKey(0, 1, time_2), tile2),
             (SpaceTimeKey(1, 1, time_2), tile2),
             (SpaceTimeKey(0, 0, time_3), tile),
             (SpaceTimeKey(1, 0, time_3), tile2),
             (SpaceTimeKey(0, 1, time_3), tile),
             (SpaceTimeKey(1, 1, time_3), tile)]

    rdd = SparkContext.getOrCreate().parallelize(layer)

    extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0}
    layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 5, 'tileRows': 5}
    metadata = {
        'cellType': 'float32ud-1.0',
        'extent': extent,
        'crs': '+proj=longlat +datum=WGS84 +no_defs ',
        'bounds': {
            'minKey': {
                'col': 0,
                'row': 0,
                'instant': _convert_to_unix_time(time_1)
            },
            'maxKey': {
                'col': 1,
                'row': 1,
                'instant': _convert_to_unix_time(time_3)
            }
        },
        'layoutDefinition': {
            'extent': extent,
            'tileLayout': {
                'tileCols': 5,
                'tileRows': 5,
                'layoutCols': 2,
                'layoutRows': 2
            }
        }
    }

    tiled_raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME,
                                                       rdd, metadata)

    layer2 = [(TemporalProjectedExtent(Extent(0, 0, 1, 1),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_3), tile)]

    rdd2 = SparkContext.getOrCreate().parallelize(layer2)
    raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd2)

    points = [
        Point(1.0, -3.0),
        Point(0.5, 0.5),
        Point(20.0, 3.0),
        Point(1.0, -2.0),
        Point(-10.0, 15.0)
    ]

    def setUp(self):
        # TODO: make this reusable (or a pytest fixture)
        self.temp_folder = Path.cwd() / 'tmp'
        if not self.temp_folder.exists():
            self.temp_folder.mkdir()
        assert self.temp_folder.is_dir()

    def test_reproject_spatial(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())

        resampled = imagecollection.resample_spatial(resolution=0,
                                                     projection="EPSG:3857",
                                                     method="max")
        metadata = resampled.pyramid.levels[0].layer_metadata
        print(metadata)
        self.assertTrue("proj=merc" in metadata.crs)
        path = str(self.temp_folder / "reprojected.tiff")
        resampled.reduce('max',
                         'temporal').download(path,
                                              format="GTIFF",
                                              parameters={'tiled': True})

        import rasterio
        with rasterio.open(path) as ds:
            print(ds.profile)

    def test_reduce(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())

        stitched = imagecollection.reduce(
            "max", "temporal").pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertEqual(2.0, stitched.cells[0][0][1])

        stitched = imagecollection.reduce(
            "min", "temporal").pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertEqual(1.0, stitched.cells[0][0][1])

        stitched = imagecollection.reduce(
            "sum", "temporal").pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertEqual(4.0, stitched.cells[0][0][1])

        stitched = imagecollection.reduce(
            "mean", "temporal").pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertAlmostEqual(1.3333333, stitched.cells[0][0][1])

        stitched = imagecollection.reduce(
            "variance", "temporal").pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(0.0, stitched.cells[0][0][0])
        self.assertAlmostEqual(0.2222222, stitched.cells[0][0][1])

        stitched = imagecollection.reduce(
            "sd", "temporal").pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(0.0, stitched.cells[0][0][0])
        self.assertAlmostEqual(0.4714045, stitched.cells[0][0][1])

    def test_reduce_all_data(self):
        input = Pyramid({
            0:
            self._single_pixel_layer({
                datetime.datetime.strptime("2016-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                1.0,
                datetime.datetime.strptime("2017-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                5.0
            })
        })

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())

        stitched = imagecollection.reduce(
            "min", "temporal").pyramid.levels[0].stitch()
        self.assertEqual(1.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "max", "temporal").pyramid.levels[0].stitch()
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "sum", "temporal").pyramid.levels[0].stitch()
        self.assertEqual(6.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "mean", "temporal").pyramid.levels[0].stitch()
        self.assertAlmostEqual(3.0, stitched.cells[0][0][0], delta=0.001)

        stitched = imagecollection.reduce(
            "variance", "temporal").pyramid.levels[0].stitch()
        self.assertAlmostEqual(4.0, stitched.cells[0][0][0], delta=0.001)

        stitched = imagecollection.reduce(
            "sd", "temporal").pyramid.levels[0].stitch()
        self.assertAlmostEqual(2.0, stitched.cells[0][0][0], delta=0.001)

    def test_reduce_some_nodata(self):
        no_data = -1.0

        input = Pyramid({
            0:
            self._single_pixel_layer(
                {
                    datetime.datetime.strptime("2016-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                    no_data,
                    datetime.datetime.strptime("2017-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                    5.0
                }, no_data)
        })

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())

        stitched = imagecollection.reduce(
            "min", "temporal").pyramid.levels[0].stitch()
        #print(stitched)
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "max", "temporal").pyramid.levels[0].stitch()
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "sum", "temporal").pyramid.levels[0].stitch()
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "mean", "temporal").pyramid.levels[0].stitch()
        self.assertAlmostEqual(5.0, stitched.cells[0][0][0], delta=0.001)

        stitched = imagecollection.reduce(
            "variance", "temporal").pyramid.levels[0].stitch()
        self.assertAlmostEqual(0.0, stitched.cells[0][0][0], delta=0.001)

        stitched = imagecollection.reduce(
            "sd", "temporal").pyramid.levels[0].stitch()
        self.assertAlmostEqual(0.0, stitched.cells[0][0][0], delta=0.001)

    def test_reduce_tiles(self):
        print("======")
        tile1 = self._single_pixel_tile(1)
        tile2 = self._single_pixel_tile(5)

        cube = np.array([tile1.cells, tile2.cells])

        # "MIN", "MAX", "SUM", "MEAN", "VARIANCE"

        std = np.std(cube, axis=0)
        var = np.var(cube, axis=0)
        print(var)

    @staticmethod
    def _single_pixel_tile(value, no_data=-1.0):
        cells = np.array([[value]])
        return Tile.from_numpy_array(cells, no_data)

    def _single_pixel_layer(self, grid_value_by_datetime, no_data=-1.0):
        from collections import OrderedDict

        sorted_by_datetime = OrderedDict(sorted(
            grid_value_by_datetime.items()))

        def elem(timestamp, value):
            tile = self._single_pixel_tile(value, no_data)
            return [(SpaceTimeKey(0, 0, timestamp), tile)]

        layer = [
            elem(timestamp, value)
            for timestamp, value in sorted_by_datetime.items()
        ]
        rdd = SparkContext.getOrCreate().parallelize(layer)

        datetimes = list(sorted_by_datetime.keys())

        extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 1.0, 'ymax': 1.0}
        layout = {
            'layoutCols': 1,
            'layoutRows': 1,
            'tileCols': 1,
            'tileRows': 1
        }
        metadata = {
            'cellType': 'float32ud%f' % no_data,
            'extent': extent,
            'crs': '+proj=longlat +datum=WGS84 +no_defs ',
            'bounds': {
                'minKey': {
                    'col': 0,
                    'row': 0,
                    'instant': _convert_to_unix_time(datetimes[0])
                },
                'maxKey': {
                    'col': 0,
                    'row': 0,
                    'instant': _convert_to_unix_time(datetimes[-1])
                }
            },
            'layoutDefinition': {
                'extent': extent,
                'tileLayout': layout
            }
        }

        return TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd,
                                               metadata)

    def test_reduce_nontemporal(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())
        with self.assertRaises(AttributeError) as context:
            imagecollection.reduce("max",
                                   "spectral").pyramid.levels[0].stitch()
        print(context.exception)

    def test_aggregate_temporal(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())
        stitched = imagecollection.aggregate_temporal(
            ["2017-01-01", "2018-01-01"], ["2017-01-03"],
            "max").pyramid.levels[0].to_spatial_layer().stitch()
        print(stitched)

    def test_max_aggregator(self):
        tiles = [self.tile, self.tile2]
        composite = max_composite(tiles)
        self.assertEqual(2.0, composite.cells[0][0])

    def test_aggregate_max_time(self):

        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())

        stitched = imagecollection.reduce(
            'max', 'temporal').pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])

    def test_min_time(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())
        min_time = imagecollection.reduce('min', 'temporal')
        max_time = imagecollection.reduce('max', 'temporal')

        stitched = min_time.pyramid.levels[0].stitch()
        print(stitched)

        self.assertEquals(2.0, stitched.cells[0][0][0])

        for p in self.points[1:3]:
            result = min_time.timeseries(p.x, p.y, srs="EPSG:3857")
            print(result)
            print(imagecollection.timeseries(p.x, p.y, srs="EPSG:3857"))
            max_result = max_time.timeseries(p.x, p.y, srs="EPSG:3857")
            self.assertEqual(1.0, result['NoDate'])
            self.assertEqual(2.0, max_result['NoDate'])

    def test_apply_spatiotemporal(self):
        import openeo_udf.functions

        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry(), {
                "bands": [{
                    "band_id": "2",
                    "name": "blue",
                    "wavelength_nm": 496.6,
                    "res_m": 10,
                    "scale": 0.0001,
                    "offset": 0,
                    "type": "int16",
                    "unit": "1"
                }]
            })
        import os, openeo_udf
        dir = os.path.dirname(openeo_udf.functions.__file__)
        file_name = os.path.join(dir, "datacube_reduce_time_sum.py")
        with open(file_name, "r") as f:
            udf_code = f.read()

        result = imagecollection.apply_tiles_spatiotemporal(udf_code)
        stitched = result.pyramid.levels[0].to_spatial_layer().stitch()
        print(stitched)
        self.assertEqual(2, stitched.cells[0][0][0])
        self.assertEqual(6, stitched.cells[0][0][5])
        self.assertEqual(4, stitched.cells[0][5][6])

    def test_apply_dimension_spatiotemporal(self):

        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry(), {
                "bands": [{
                    "band_id": "2",
                    "name": "blue",
                    "wavelength_nm": 496.6,
                    "res_m": 10,
                    "scale": 0.0001,
                    "offset": 0,
                    "type": "int16",
                    "unit": "1"
                }]
            })

        udf_code = """
def rct_savitzky_golay(udf_data:UdfData):
    from scipy.signal import savgol_filter

    print(udf_data.get_datacube_list())
    return udf_data
        
        """

        result = imagecollection.apply_tiles_spatiotemporal(udf_code)
        local_tiles = result.pyramid.levels[0].to_numpy_rdd().collect()
        print(local_tiles)
        self.assertEquals(len(TestMultipleDates.layer), len(local_tiles))
        ref_dict = {
            e[0]: e[1]
            for e in imagecollection.pyramid.levels[0].convert_data_type(
                CellType.FLOAT64).to_numpy_rdd().collect()
        }
        result_dict = {e[0]: e[1] for e in local_tiles}
        for k, v in ref_dict.items():
            tile = result_dict[k]
            assert_array_almost_equal(np.squeeze(v.cells),
                                      np.squeeze(tile.cells),
                                      decimal=2)

    def test_mask_raster(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        def createMask(tile):
            tile.cells[0][0][0] = 0.0
            return tile

        mask_layer = self.tiled_raster_rdd.map_tiles(createMask)
        mask = Pyramid({0: mask_layer})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())
        stitched = imagecollection.mask(
            rastermask=GeotrellisTimeSeriesImageCollection(
                mask, InMemoryServiceRegistry()),
            replacement=10.0).reduce('max',
                                     'temporal').pyramid.levels[0].stitch()
        print(stitched)
        self.assertEquals(2.0, stitched.cells[0][0][0])
        self.assertEquals(10.0, stitched.cells[0][0][1])

    def test_apply_kernel(self):
        kernel = np.array([[0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0]])

        input = Pyramid({0: self.tiled_raster_rdd})
        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())
        stitched = imagecollection.apply_kernel(kernel, 2.0).reduce(
            'max', 'temporal').pyramid.levels[0].stitch()

        self.assertEquals(12.0, stitched.cells[0][0][0])
        self.assertEquals(16.0, stitched.cells[0][0][1])
        self.assertEquals(20.0, stitched.cells[0][1][1])

    def test_resample_spatial(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())

        resampled = imagecollection.resample_spatial(resolution=0.05)

        path = str(self.temp_folder / "resampled.tiff")
        resampled.reduce('max',
                         'temporal').download(path,
                                              format="GTIFF",
                                              parameters={'tiled': True})

        import rasterio
        with rasterio.open(path) as ds:
            print(ds.profile)
            self.assertAlmostEqual(0.05, ds.res[0], 3)
Пример #11
0
class TestMultipleDates(TestCase):
    band1 = np.array([[-1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                      [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                      [1.0, 1.0, 1.0, 1.0, 1.0]])

    band2 = np.array([[2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0],
                      [2.0, 2.0, -1.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0],
                      [2.0, 2.0, 2.0, 2.0, 2.0]])

    tile = Tile.from_numpy_array(band1, no_data_value=-1.0)
    tile2 = Tile.from_numpy_array(band2, no_data_value=-1.0)
    time_1 = datetime.datetime.strptime("2016-08-24T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')
    time_2 = datetime.datetime.strptime("2017-08-24T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')
    time_3 = datetime.datetime.strptime("2017-10-17T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')

    layer = [(SpaceTimeKey(0, 0, time_1), tile),
             (SpaceTimeKey(1, 0, time_1), tile2),
             (SpaceTimeKey(0, 1, time_1), tile),
             (SpaceTimeKey(1, 1, time_1), tile),
             (SpaceTimeKey(0, 0, time_2), tile2),
             (SpaceTimeKey(1, 0, time_2), tile2),
             (SpaceTimeKey(0, 1, time_2), tile2),
             (SpaceTimeKey(1, 1, time_2), tile2),
             (SpaceTimeKey(0, 0, time_3), tile),
             (SpaceTimeKey(1, 0, time_3), tile2),
             (SpaceTimeKey(0, 1, time_3), tile),
             (SpaceTimeKey(1, 1, time_3), tile)]

    rdd = SparkContext.getOrCreate().parallelize(layer)

    extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0}
    layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 5, 'tileRows': 5}
    metadata = {
        'cellType': 'float32ud-1.0',
        'extent': extent,
        'crs': '+proj=longlat +datum=WGS84 +no_defs ',
        'bounds': {
            'minKey': {
                'col': 0,
                'row': 0,
                'instant': _convert_to_unix_time(time_1)
            },
            'maxKey': {
                'col': 1,
                'row': 1,
                'instant': _convert_to_unix_time(time_3)
            }
        },
        'layoutDefinition': {
            'extent': extent,
            'tileLayout': {
                'tileCols': 5,
                'tileRows': 5,
                'layoutCols': 2,
                'layoutRows': 2
            }
        }
    }
    collection_metadata = GeopysparkCubeMetadata(
        {"cube:dimensions": {
            "t": {
                "type": "temporal"
            },
        }})

    tiled_raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME,
                                                       rdd, metadata)

    layer2 = [(TemporalProjectedExtent(Extent(0, 0, 1, 1),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_3), tile)]

    rdd2 = SparkContext.getOrCreate().parallelize(layer2)
    raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd2)

    points = [
        Point(1.0, -3.0),
        Point(0.5, 0.5),
        Point(20.0, 3.0),
        Point(1.0, -2.0),
        Point(-10.0, 15.0)
    ]

    def setUp(self):
        # TODO: make this reusable (or a pytest fixture)
        self.temp_folder = Path.cwd() / 'tmp'
        if not self.temp_folder.exists():
            self.temp_folder.mkdir()
        assert self.temp_folder.is_dir()

    def test_reproject_spatial(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeopysparkDataCube(pyramid=input,
                                             metadata=self.collection_metadata)

        ref_path = str(self.temp_folder / "reproj_ref.tiff")
        imagecollection.reduce('max',
                               dimension="t").save_result(ref_path,
                                                          format="GTIFF")

        resampled = imagecollection.resample_spatial(resolution=0,
                                                     projection="EPSG:3395",
                                                     method="max")
        metadata = resampled.pyramid.levels[0].layer_metadata
        print(metadata)
        self.assertTrue("proj=merc" in metadata.crs)
        path = str(self.temp_folder / "reprojected.tiff")
        res = resampled.reduce('max', dimension="t")
        res.save_result(path, format="GTIFF")

        with rasterio.open(ref_path) as ref_ds:
            with rasterio.open(path) as ds:
                print(ds.profile)
                #this reprojection does not change the shape, so we can compare
                assert ds.read().shape == ref_ds.read().shape

                assert (ds.crs.to_epsg() == 3395)

    def test_reduce(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        cube = GeopysparkDataCube(pyramid=input,
                                  metadata=self.collection_metadata)
        env = EvalEnv()

        stitched = cube.reduce_dimension(dimension="t",
                                         reducer=reducer("max"),
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertEqual(2.0, stitched.cells[0][0][1])

        stitched = cube.reduce_dimension(dimension="t",
                                         reducer=reducer("min"),
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertEqual(1.0, stitched.cells[0][0][1])

        stitched = cube.reduce_dimension(dimension="t",
                                         reducer=reducer("sum"),
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertEqual(4.0, stitched.cells[0][0][1])

        stitched = cube.reduce_dimension(dimension="t",
                                         reducer=reducer("mean"),
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertAlmostEqual(1.3333333, stitched.cells[0][0][1])

        stitched = cube.reduce_dimension(reducer=reducer("variance"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(0.0, stitched.cells[0][0][0])
        self.assertAlmostEqual(0.2222222, stitched.cells[0][0][1])

        stitched = cube.reduce_dimension(reducer=reducer("sd"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(0.0, stitched.cells[0][0][0])
        self.assertAlmostEqual(0.4714045, stitched.cells[0][0][1])

    def test_reduce_all_data(self):
        input = Pyramid({
            0:
            self._single_pixel_layer({
                datetime.datetime.strptime("2016-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                1.0,
                datetime.datetime.strptime("2017-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                5.0
            })
        })

        cube = GeopysparkDataCube(pyramid=input,
                                  metadata=self.collection_metadata)
        env = EvalEnv()
        stitched = cube.reduce_dimension(reducer=reducer("min"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertEqual(1.0, stitched.cells[0][0][0])

        stitched = cube.reduce_dimension(reducer=reducer("max"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = cube.reduce_dimension(reducer=reducer("sum"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertEqual(6.0, stitched.cells[0][0][0])

        stitched = cube.reduce_dimension(reducer=reducer("mean"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertAlmostEqual(3.0, stitched.cells[0][0][0], delta=0.001)

        stitched = cube.reduce_dimension(reducer=reducer("variance"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertAlmostEqual(4.0, stitched.cells[0][0][0], delta=0.001)

        stitched = cube.reduce_dimension(reducer=reducer("sd"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertAlmostEqual(2.0, stitched.cells[0][0][0], delta=0.001)

    def test_reduce_some_nodata(self):
        no_data = -1.0

        input = Pyramid({
            0:
            self._single_pixel_layer(
                {
                    datetime.datetime.strptime("2016-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                    no_data,
                    datetime.datetime.strptime("2017-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                    5.0
                }, no_data)
        })

        imagecollection = GeopysparkDataCube(pyramid=input,
                                             metadata=self.collection_metadata)

        stitched = imagecollection.reduce(
            "min", dimension="t").pyramid.levels[0].stitch()
        #print(stitched)
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "max", dimension="t").pyramid.levels[0].stitch()
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "sum", dimension="t").pyramid.levels[0].stitch()
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "mean", dimension="t").pyramid.levels[0].stitch()
        self.assertAlmostEqual(5.0, stitched.cells[0][0][0], delta=0.001)

        stitched = imagecollection.reduce(
            "variance", dimension="t").pyramid.levels[0].stitch()
        self.assertAlmostEqual(0.0, stitched.cells[0][0][0], delta=0.001)

        stitched = imagecollection.reduce(
            "sd", dimension="t").pyramid.levels[0].stitch()
        self.assertAlmostEqual(0.0, stitched.cells[0][0][0], delta=0.001)

    def test_reduce_tiles(self):
        print("======")
        tile1 = self._single_pixel_tile(1)
        tile2 = self._single_pixel_tile(5)

        cube = np.array([tile1.cells, tile2.cells])

        # "MIN", "MAX", "SUM", "MEAN", "VARIANCE"

        std = np.std(cube, axis=0)
        var = np.var(cube, axis=0)
        print(var)

    @staticmethod
    def _single_pixel_tile(value, no_data=-1.0):
        cells = np.array([[value]])
        return Tile.from_numpy_array(cells, no_data)

    def _single_pixel_layer(self, grid_value_by_datetime, no_data=-1.0):
        from collections import OrderedDict

        sorted_by_datetime = OrderedDict(sorted(
            grid_value_by_datetime.items()))

        def elem(timestamp, value):
            tile = self._single_pixel_tile(value, no_data)
            return [(SpaceTimeKey(0, 0, timestamp), tile)]

        layer = [
            elem(timestamp, value)
            for timestamp, value in sorted_by_datetime.items()
        ]
        rdd = SparkContext.getOrCreate().parallelize(layer)

        datetimes = list(sorted_by_datetime.keys())

        extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 1.0, 'ymax': 1.0}
        layout = {
            'layoutCols': 1,
            'layoutRows': 1,
            'tileCols': 1,
            'tileRows': 1
        }
        metadata = {
            'cellType': 'float32ud%f' % no_data,
            'extent': extent,
            'crs': '+proj=longlat +datum=WGS84 +no_defs ',
            'bounds': {
                'minKey': {
                    'col': 0,
                    'row': 0,
                    'instant': _convert_to_unix_time(datetimes[0])
                },
                'maxKey': {
                    'col': 0,
                    'row': 0,
                    'instant': _convert_to_unix_time(datetimes[-1])
                }
            },
            'layoutDefinition': {
                'extent': extent,
                'tileLayout': layout
            }
        }

        return TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd,
                                               metadata)

    def test_reduce_nontemporal(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeopysparkDataCube(pyramid=input,
                                             metadata=self.collection_metadata)
        with self.assertRaises(FeatureUnsupportedException) as context:
            imagecollection.reduce(
                "max", dimension="gender").pyramid.levels[0].stitch()
        print(context.exception)

    def test_aggregate_temporal(self):
        """
        Tests deprecated process spec! To be phased out.
        @return:
        """
        interval_list = ["2017-01-01", "2018-01-01"]
        self._test_aggregate_temporal(interval_list)

    def _median_reducer(self):
        from openeo.processes import median
        builder = median({"from_argument": "data"})
        return builder.flat_graph()

    def test_aggregate_temporal_median(self):
        input = Pyramid({0: self.tiled_raster_rdd})
        imagecollection = GeopysparkDataCube(pyramid=input,
                                             metadata=self.collection_metadata)
        stitched = (imagecollection.aggregate_temporal(
            ["2015-01-01", "2018-01-01"], ["2017-01-03"],
            self._median_reducer(),
            dimension="t").pyramid.levels[0].to_spatial_layer().stitch())
        print(stitched)
        expected_median = np.median(
            [self.tile.cells, self.tile2.cells, self.tile.cells], axis=0)
        #TODO nodata handling??
        assert_array_almost_equal(stitched.cells[0, 1:2, 1:2],
                                  expected_median[1:2, 1:2])

    def _test_aggregate_temporal(self, interval_list):
        input = Pyramid({0: self.tiled_raster_rdd})
        imagecollection = GeopysparkDataCube(pyramid=input,
                                             metadata=self.collection_metadata)
        stitched = (imagecollection.aggregate_temporal(
            interval_list, ["2017-01-03"], "min",
            dimension="t").pyramid.levels[0].to_spatial_layer().stitch())
        print(stitched)
        expected_max = np.min([self.tile2.cells, self.tile.cells], axis=0)
        assert_array_almost_equal(stitched.cells[0, 0:5, 0:5], expected_max)

    def test_aggregate_temporal_100(self):
        self._test_aggregate_temporal([["2017-01-01", "2018-01-01"]])

    def test_max_aggregator(self):
        tiles = [self.tile, self.tile2]
        composite = max_composite(tiles)
        self.assertEqual(2.0, composite.cells[0][0])

    def test_aggregate_max_time(self):
        input = Pyramid({0: self.tiled_raster_rdd})
        imagecollection = GeopysparkDataCube(pyramid=input,
                                             metadata=self.collection_metadata)

        layer = imagecollection.reduce('max', dimension='t').pyramid.levels[0]
        stitched = layer.stitch()
        assert CellType.FLOAT32.value == layer.layer_metadata.cell_type
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])

    def test_min_time(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        cube = GeopysparkDataCube(pyramid=input,
                                  metadata=self.collection_metadata)
        env = EvalEnv()
        min_time = cube.reduce_dimension(reducer=reducer('min'),
                                         dimension='t',
                                         env=env)
        max_time = cube.reduce_dimension(reducer=reducer('max'),
                                         dimension='t',
                                         env=env)

        stitched = min_time.pyramid.levels[0].stitch()
        print(stitched)

        self.assertEquals(2.0, stitched.cells[0][0][0])

        for p in self.points[1:3]:
            result = min_time.timeseries(p.x, p.y, srs="EPSG:3857")
            print(result)
            print(cube.timeseries(p.x, p.y, srs="EPSG:3857"))
            max_result = max_time.timeseries(p.x, p.y, srs="EPSG:3857")
            self.assertEqual(1.0, result['NoDate'])
            self.assertEqual(2.0, max_result['NoDate'])

    def test_apply_dimension_spatiotemporal(self):

        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeopysparkDataCube(
            pyramid=input,
            metadata=GeopysparkCubeMetadata({
                "cube:dimensions": {
                    # TODO: also specify other dimensions?
                    "bands": {
                        "type": "bands",
                        "values": ["2"]
                    }
                },
                "summaries": {
                    "eo:bands": [{
                        "name": "2",
                        "common_name": "blue",
                        "wavelength_nm": 496.6,
                        "res_m": 10,
                        "scale": 0.0001,
                        "offset": 0,
                        "type": "int16",
                        "unit": "1"
                    }]
                }
            }))

        udf_code = """
def rct_savitzky_golay(udf_data:UdfData):
    from scipy.signal import savgol_filter

    print(udf_data.get_datacube_list())
    return udf_data
        
        """

        result = imagecollection.apply_tiles_spatiotemporal(udf_code)
        local_tiles = result.pyramid.levels[0].to_numpy_rdd().collect()
        print(local_tiles)
        self.assertEquals(len(TestMultipleDates.layer), len(local_tiles))
        ref_dict = {
            e[0]: e[1]
            for e in imagecollection.pyramid.levels[0].convert_data_type(
                CellType.FLOAT64).to_numpy_rdd().collect()
        }
        result_dict = {e[0]: e[1] for e in local_tiles}
        for k, v in ref_dict.items():
            tile = result_dict[k]
            assert_array_almost_equal(np.squeeze(v.cells),
                                      np.squeeze(tile.cells),
                                      decimal=2)

    def test_mask_raster_replacement_default_none(self):
        def createMask(tile):
            tile.cells[0][0][0] = 0.0
            return tile

        input = Pyramid({0: self.tiled_raster_rdd})
        mask_layer = self.tiled_raster_rdd.map_tiles(createMask)
        mask = Pyramid({0: mask_layer})

        cube = GeopysparkDataCube(pyramid=input,
                                  metadata=self.collection_metadata)
        mask_cube = GeopysparkDataCube(pyramid=mask)
        stitched = cube.mask(mask=mask_cube).reduce(
            'max', dimension="t").pyramid.levels[0].stitch()
        print(stitched)
        assert stitched.cells[0][0][0] == 2.0
        assert np.isnan(stitched.cells[0][0][1])

    def test_mask_raster_replacement_float(self):
        def createMask(tile):
            tile.cells[0][0][0] = 0.0
            return tile

        input = Pyramid({0: self.tiled_raster_rdd})
        mask_layer = self.tiled_raster_rdd.map_tiles(createMask)
        mask = Pyramid({0: mask_layer})

        cube = GeopysparkDataCube(pyramid=input,
                                  metadata=self.collection_metadata)
        mask_cube = GeopysparkDataCube(pyramid=mask)
        stitched = cube.mask(mask=mask_cube, replacement=10.0).reduce(
            'max', dimension="t").pyramid.levels[0].stitch()
        print(stitched)
        assert stitched.cells[0][0][0] == 2.0
        assert stitched.cells[0][0][1] == 10.0

    def test_mask_raster_replacement_int(self):
        def createMask(tile):
            tile.cells[0][0][0] = 0.0
            return tile

        input = Pyramid({0: self.tiled_raster_rdd})
        mask_layer = self.tiled_raster_rdd.map_tiles(createMask)
        mask = Pyramid({0: mask_layer})

        cube = GeopysparkDataCube(pyramid=input,
                                  metadata=self.collection_metadata)
        mask_cube = GeopysparkDataCube(pyramid=mask)
        stitched = cube.mask(mask=mask_cube, replacement=10).reduce(
            'max', dimension="t").pyramid.levels[0].stitch()
        print(stitched)
        assert stitched.cells[0][0][0] == 2.0
        assert stitched.cells[0][0][1] == 10.0

    def test_apply_kernel_float(self):
        kernel = np.array([[0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0]])

        input = Pyramid({0: self.tiled_raster_rdd})
        img = GeopysparkDataCube(pyramid=input,
                                 metadata=self.collection_metadata)
        stitched = img.apply_kernel(kernel, 2.0).reduce(
            'max', dimension="t").pyramid.levels[0].stitch()

        assert stitched.cells[0][0][0] == 12.0
        assert stitched.cells[0][0][1] == 16.0
        assert stitched.cells[0][1][1] == 20.0

    def test_apply_kernel_int(self):
        kernel = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])

        input = Pyramid({0: self.tiled_raster_rdd})
        img = GeopysparkDataCube(pyramid=input,
                                 metadata=self.collection_metadata)
        stitched = img.apply_kernel(kernel).reduce(
            'max', dimension="t").pyramid.levels[0].stitch()

        assert stitched.cells[0][0][0] == 6.0
        assert stitched.cells[0][0][1] == 8.0
        assert stitched.cells[0][1][1] == 10.0

    def test_resample_spatial(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeopysparkDataCube(pyramid=input,
                                             metadata=self.collection_metadata)

        resampled = imagecollection.resample_spatial(resolution=0.05)

        path = str(self.temp_folder / "resampled.tiff")
        res = resampled.reduce('max', dimension="t")
        res.save_result(path, format="GTIFF")

        import rasterio
        with rasterio.open(path) as ds:
            print(ds.profile)
            self.assertAlmostEqual(0.05, ds.res[0], 3)

    def test_rename_dimension(self):
        imagecollection = GeopysparkDataCube(pyramid=Pyramid(
            {0: self.tiled_raster_rdd}),
                                             metadata=self.collection_metadata)

        dim_renamed = imagecollection.rename_dimension('t', 'myNewTimeDim')

        dim_renamed.metadata.assert_valid_dimension('myNewTimeDim')
Пример #12
0
class ToSpatialLayerTest(BaseTestClass):
    band_1 = np.array([[1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                       [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                       [1.0, 1.0, 1.0, 1.0, 1.0]])

    band_2 = np.array([[2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0],
                       [2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0],
                       [2.0, 2.0, 2.0, 2.0, 2.0]])

    tile_1 = Tile.from_numpy_array(np.array([band_1]))
    tile_2 = Tile.from_numpy_array(np.array([band_2]))
    time_1 = datetime.datetime.strptime("2016-08-24T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')
    time_2 = datetime.datetime.strptime("2017-08-24T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')

    layer = [(SpaceTimeKey(0, 0, time_1), tile_1),
             (SpaceTimeKey(1, 0, time_1), tile_1),
             (SpaceTimeKey(0, 1, time_1), tile_1),
             (SpaceTimeKey(1, 1, time_1), tile_1),
             (SpaceTimeKey(0, 0, time_2), tile_2),
             (SpaceTimeKey(1, 0, time_2), tile_2),
             (SpaceTimeKey(0, 1, time_2), tile_2),
             (SpaceTimeKey(1, 1, time_2), tile_2)]

    rdd = BaseTestClass.pysc.parallelize(layer)

    extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0}
    layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 5, 'tileRows': 5}
    metadata = {
        'cellType': 'float32ud-1.0',
        'extent': extent,
        'crs': '+proj=longlat +datum=WGS84 +no_defs ',
        'bounds': {
            'minKey': {
                'col': 0,
                'row': 0,
                'instant': _convert_to_unix_time(time_1)
            },
            'maxKey': {
                'col': 1,
                'row': 1,
                'instant': _convert_to_unix_time(time_2)
            }
        },
        'layoutDefinition': {
            'extent': extent,
            'tileLayout': {
                'tileCols': 5,
                'tileRows': 5,
                'layoutCols': 2,
                'layoutRows': 2
            }
        }
    }

    tiled_raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME,
                                                       rdd, metadata)

    layer2 = [(TemporalProjectedExtent(Extent(0, 0, 1, 1),
                                       epsg=3857,
                                       instant=time_1), tile_1),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_1), tile_1),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_1), tile_1),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_1), tile_1),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_2), tile_2),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_2), tile_2),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_2), tile_2),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_2), tile_2)]

    rdd2 = BaseTestClass.pysc.parallelize(layer2)
    raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd2)

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    # This test should be moved to a more appropriate file once more spatial-temporal
    # tests are made.
    def test_spatial_metadata(self):
        metadata = self.raster_rdd.collect_metadata()
        min_key = metadata.bounds.minKey
        max_key = metadata.bounds.maxKey

        self.assertEqual(min_key.instant, self.time_1)
        self.assertEqual(max_key.instant, self.time_2)

    def test_to_spatial_raster_layer(self):
        actual = self.raster_rdd.to_spatial_layer().to_numpy_rdd().keys(
        ).collect()

        expected = [
            ProjectedExtent(Extent(0, 0, 1, 1), 3857),
            ProjectedExtent(Extent(1, 0, 2, 1), 3857),
            ProjectedExtent(Extent(0, 1, 1, 2), 3857),
            ProjectedExtent(Extent(1, 1, 2, 2), 3857)
        ]

        for x in actual:
            self.assertTrue(x in expected)

    def test_to_spatial_target_time_raster_layer(self):
        converted = self.raster_rdd.to_spatial_layer(target_time=self.time_1)
        keys = converted.to_numpy_rdd().keys().collect()
        values = converted.to_numpy_rdd().values().collect()

        expected = [
            ProjectedExtent(Extent(0, 0, 1, 1), 3857),
            ProjectedExtent(Extent(1, 0, 2, 1), 3857),
            ProjectedExtent(Extent(0, 1, 1, 2), 3857),
            ProjectedExtent(Extent(1, 1, 2, 2), 3857)
        ]

        for x in keys:
            self.assertTrue(x in expected)

        for x in values:
            self.assertEqual(x.cells.shape, self.tile_1.cells.shape)
            self.assertTrue((x.cells == 1.0).all())

    def test_to_spatial_tiled_layer(self):
        actual = self.tiled_raster_rdd.to_spatial_layer().to_numpy_rdd().keys(
        ).collect()

        expected = [
            SpatialKey(0, 0),
            SpatialKey(1, 0),
            SpatialKey(0, 1),
            SpatialKey(1, 1)
        ]

        for x in actual:
            self.assertTrue(x in expected)

    def test_to_spatial_target_time_tiled_layer(self):
        converted = self.tiled_raster_rdd.to_spatial_layer(
            target_time=self.time_2)
        keys = converted.to_numpy_rdd().keys().collect()
        values = converted.to_numpy_rdd().values().collect()

        expected = [
            SpatialKey(0, 0),
            SpatialKey(1, 0),
            SpatialKey(0, 1),
            SpatialKey(1, 1)
        ]

        for x in keys:
            self.assertTrue(x in expected)

        for x in values:
            self.assertEqual(x.cells.shape, self.tile_2.cells.shape)
            self.assertTrue((x.cells == 2.0).all())
Пример #13
0
class FilterByTimesTest(BaseTestClass):
    band = np.array([[1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                     [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                     [1.0, 1.0, 1.0, 1.0, 1.0]])

    tile = Tile.from_numpy_array(band)
    time_1 = datetime.datetime.strptime("2016-08-24T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')
    time_2 = datetime.datetime.strptime("2017-08-24T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')
    time_3 = datetime.datetime.strptime("2017-10-17T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')

    layer = [(SpaceTimeKey(0, 0, time_1), tile),
             (SpaceTimeKey(1, 0, time_1), tile),
             (SpaceTimeKey(0, 1, time_1), tile),
             (SpaceTimeKey(1, 1, time_1), tile),
             (SpaceTimeKey(0, 0, time_2), tile),
             (SpaceTimeKey(1, 0, time_2), tile),
             (SpaceTimeKey(0, 1, time_2), tile),
             (SpaceTimeKey(1, 1, time_2), tile),
             (SpaceTimeKey(0, 0, time_3), tile),
             (SpaceTimeKey(1, 0, time_3), tile),
             (SpaceTimeKey(0, 1, time_3), tile),
             (SpaceTimeKey(1, 1, time_3), tile)]

    rdd = BaseTestClass.pysc.parallelize(layer)

    extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0}
    layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 5, 'tileRows': 5}
    metadata = {
        'cellType': 'float32ud-1.0',
        'extent': extent,
        'crs': '+proj=longlat +datum=WGS84 +no_defs ',
        'bounds': {
            'minKey': {
                'col': 0,
                'row': 0,
                'instant': _convert_to_unix_time(time_1)
            },
            'maxKey': {
                'col': 1,
                'row': 1,
                'instant': _convert_to_unix_time(time_3)
            }
        },
        'layoutDefinition': {
            'extent': extent,
            'tileLayout': {
                'tileCols': 5,
                'tileRows': 5,
                'layoutCols': 2,
                'layoutRows': 2
            }
        }
    }

    tiled_raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME,
                                                       rdd, metadata)

    layer2 = [(TemporalProjectedExtent(Extent(0, 0, 1, 1),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_3), tile)]

    rdd2 = BaseTestClass.pysc.parallelize(layer2)
    raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd2)

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def test_filter_temporal_projected_extent_single_time(self):
        result = self.raster_rdd.filter_by_times([self.time_1])
        expected = self.layer2[:4]
        actual = result.to_numpy_rdd().collect()

        self.assertEqual(len(expected), len(actual))

        for x, y in zip(expected, actual):
            self.assertEqual(x[0], y[0])
            self.assertTrue((x[1].cells == y[1].cells).all())

    def test_filter_temporal_projected_extent_multi_intervals(self):
        result = self.raster_rdd.filter_by_times([self.time_2, self.time_3])
        expected = self.layer2[4:]
        actual = result.to_numpy_rdd().collect()

        self.assertEqual(len(expected), len(actual))

        for x, y in zip(expected, actual):
            self.assertEqual(x[0], y[0])
            self.assertTrue((x[1].cells == y[1].cells).all())

    def test_filter_spacetime_key_single_time(self):
        result = self.tiled_raster_rdd.filter_by_times([self.time_3])
        expected = self.layer[8:]
        actual = result.to_numpy_rdd().collect()

        self.assertEqual(len(expected), len(actual))

        for x, y in zip(expected, actual):
            self.assertEqual(x[0], y[0])
            self.assertTrue((x[1].cells == y[1].cells).all())

    def test_filter_spacetime_key_multi_intervals(self):
        result = self.tiled_raster_rdd.filter_by_times(
            [self.time_1, self.time_2])
        expected = self.layer[:8]
        actual = result.to_numpy_rdd().collect()

        self.assertEqual(len(expected), len(actual))

        for x, y in zip(expected, actual):
            self.assertEqual(x[0], y[0])
            self.assertTrue((x[1].cells == y[1].cells).all())