def test_space_time_keys(self): temp_keys = [ SpaceTimeKey(0, 0, instant=self.time), SpaceTimeKey(0, 1, instant=self.time) ] temp_key_layer = [(temp_keys[0], self.tile_2), (temp_keys[1], self.tile_2), (temp_keys[0], self.tile_2), (temp_keys[1], self.tile_2)] temp_bounds = Bounds(temp_keys[0], temp_keys[1]) temp_md = Metadata(bounds=temp_bounds, crs=self.md_proj, cell_type=self.ct, extent=self.extent, layout_definition=self.ld) rdd = self.pysc.parallelize(temp_key_layer) layer = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd, temp_md) actual = layer.merge() self.assertEqual(actual.srdd.rdd().count(), 2) for k, v in actual.to_numpy_rdd().collect(): self.assertTrue((v.cells == self.arr_2).all())
def test_spatial_keys(self): keys = [ SpatialKey(0, 0), SpatialKey(0, 1), SpatialKey(1, 0), SpatialKey(1, 1) ] key_layer = [(keys[0], self.tile), (keys[1], self.tile), (keys[2], self.tile), (keys[3], self.tile)] bounds = Bounds(keys[0], keys[3]) md = Metadata(bounds=bounds, crs=self.md_proj, cell_type=self.ct, extent=self.extent, layout_definition=self.ld) rdd = self.pysc.parallelize(key_layer) layer = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd, md) actual = layer.collect_keys() for x in actual: self.assertTrue(x in keys)
def test_space_time_keys(self): temp_keys = [ SpaceTimeKey(0, 0, instant=self.time), SpaceTimeKey(0, 1, instant=self.time), SpaceTimeKey(1, 0, instant=self.time), SpaceTimeKey(1, 1, instant=self.time) ] temp_key_layer = [(temp_keys[0], self.tile), (temp_keys[1], self.tile), (temp_keys[2], self.tile), (temp_keys[3], self.tile)] temp_bounds = Bounds(temp_keys[0], temp_keys[3]) temp_md = Metadata(bounds=temp_bounds, crs=self.md_proj, cell_type=self.ct, extent=self.extent, layout_definition=self.ld) rdd = self.pysc.parallelize(temp_key_layer) layer = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd, temp_md) actual = layer.collect_keys() for x in actual: self.assertTrue(x in temp_keys)
def layer_metadata(self): zoom = self.layer_zoom or 0 value_json = self.store.wrapper.readMetadata(self.layer_name, zoom) if value_json: metadata_dict = json.loads(value_json) return Metadata.from_dict(metadata_dict) else: raise KeyError(self.store.uri, self.layer_name, self.layer_zoom, "layer metadata")
class MaskTest(BaseTestClass): pysc = BaseTestClass.pysc cells = np.zeros((1, 2, 2)) cells.fill(1) layer = [(SpatialKey(0, 0), Tile(cells, 'FLOAT', -1.0)), (SpatialKey(1, 0), Tile(cells, 'FLOAT', -1.0,)), (SpatialKey(0, 1), Tile(cells, 'FLOAT', -1.0,)), (SpatialKey(1, 1), Tile(cells, 'FLOAT', -1.0,))] rdd = pysc.parallelize(layer) extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 4.0, 'ymax': 4.0} layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 2, 'tileRows': 2} metadata = {'cellType': 'float32ud-1.0', 'extent': extent, 'crs': 4326, 'bounds': { 'minKey': {'col': 0, 'row': 0}, 'maxKey': {'col': 1, 'row': 1}}, 'layoutDefinition': { 'extent': extent, 'tileLayout': layout}} geoms = [box(0.0, 0.0, 2.0, 2.0), box(3.0, 3.0, 4.0, 4.0)] raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd, Metadata.from_dict(metadata)) @pytest.fixture(autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def test_geotrellis_mask(self): result = self.raster_rdd.mask(geometries=self.geoms).to_numpy_rdd() n = result.map(lambda kv: np.sum(kv[1].cells)).reduce(lambda a, b: a + b) self.assertEqual(n, 2.0) def test_rdd_mask_no_partition_strategy(self): rdd = BaseTestClass.pysc.parallelize(self.geoms) result = self.raster_rdd.mask(rdd, options=RasterizerOptions(True, 'PixelIsArea')).to_numpy_rdd() n = result.map(lambda kv: np.sum(kv[1].cells)).reduce(lambda a, b: a + b) self.assertEqual(n, 2.0) def test_rdd_mask_with_partition_strategy(self): rdd = BaseTestClass.pysc.parallelize(self.geoms) result = self.raster_rdd.mask(rdd, partition_strategy=SpatialPartitionStrategy()).to_numpy_rdd() n = result.map(lambda kv: np.sum(kv[1].cells)).reduce(lambda a, b: a + b) self.assertEqual(n, 2.0)
def collect_metadata(self, extent=None, layout=None, crs=None, tile_size=256): """Iterate over RDD records and generates layer metadata desribing the contained rasters. Args: extent (:class:`~geopyspark.geotrellis.Extent`, optional): Specify layout extent, must also specify ``layout``. layout (:obj:`~geopyspark.geotrellis.TileLayout`, optional): Specify tile layout, must also specify ``extent``. crs (str or int, optional): Ignore CRS from records and use given one instead. tile_size (int, optional): Pixel dimensions of each tile, if not using ``layout``. Note: ``extent`` and ``layout`` must both be defined if they are to be used. Returns: :class:`~geopyspark.geotrellis.Metadata` Raises: TypeError: If either ``extent`` and ``layout`` is not defined but the other is. """ if extent and not isinstance(extent, dict): extent = extent._asdict() if layout and not isinstance(layout, dict): layout = layout._asdict() if not crs: crs = "" if isinstance(crs, int): crs = str(crs) if extent and layout: json_metadata = self.srdd.collectMetadata(extent, layout, crs) elif not extent and not layout: json_metadata = self.srdd.collectMetadata(str(tile_size), crs) else: raise TypeError("Could not collect metadata with {} and {}".format( extent, layout)) return Metadata.from_dict(json.loads(json_metadata))
def read_layer_metadata(geopysc, rdd_type, uri, layer_name, layer_zoom, options=None, **kwargs): """Reads the metadata from a saved layer without reading in the whole layer. Args: geopysc (:cls:`~geopyspark.GeoPyContext`): The ``GeoPyContext`` being used this session. rdd_type (str): What the spatial type of the geotiffs are. This is represented by the constants: ``SPATIAL`` and ``SPACETIME``. uri (str): The Uniform Resource Identifier used to point towards the desired GeoTrellis catalog to be read from. The shape of this string varies depending on backend. layer_name (str): The name of the GeoTrellis catalog to be read from. layer_zoom (int): The zoom level of the layer that is to be read. options (dict, optional): Additional parameters for reading the layer for specific backends. The dictionary is only used for ``Cassandra`` and ``HBase``, no other backend requires this to be set. numPartitions (int, optional): Sets RDD partition count when reading from catalog. **kwargs: The optional parameters can also be set as keywords arguments. The keywords must be in camel case. If both options and keywords are set, then the options will be used. Returns: :class:`~geopyspark.geotrellis.Metadata` """ if options: options = options elif kwargs: options = kwargs else: options = {} _construct_catalog(geopysc, uri, options) cached = _mapped_cached[uri] if rdd_type == SPATIAL: metadata = cached.store.metadataSpatial(layer_name, layer_zoom) else: metadata = cached.store.metadataSpaceTime(layer_name, layer_zoom) return Metadata.from_dict(json.loads(metadata))
def test_spatial_keys(self): keys = [SpatialKey(0, 0), SpatialKey(0, 1)] key_layer = [(keys[0], self.tile_1), (keys[1], self.tile_1), (keys[0], self.tile_2), (keys[1], self.tile_2)] bounds = Bounds(keys[0], keys[1]) md = Metadata(bounds=bounds, crs=self.md_proj, cell_type=self.ct, extent=self.extent, layout_definition=self.ld) rdd = self.pysc.parallelize(key_layer) layer = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd, md) actual = layer.merge() self.assertEqual(actual.srdd.rdd().count(), 2) for k, v in actual.to_numpy_rdd().collect(): self.assertTrue((v.cells == self.arr_2).all())
def read_layer_metadata(layer_type, uri, layer_name, layer_zoom, options=None, **kwargs): """Reads the metadata from a saved layer without reading in the whole layer. Args: layer_type (str or :class:`geopyspark.geotrellis.constants.LayerType`): What the spatial type of the geotiffs are. This is represented by either constants within ``LayerType`` or by a string. uri (str): The Uniform Resource Identifier used to point towards the desired GeoTrellis catalog to be read from. The shape of this string varies depending on backend. layer_name (str): The name of the GeoTrellis catalog to be read from. layer_zoom (int): The zoom level of the layer that is to be read. options (dict, optional): Additional parameters for reading the layer for specific backends. The dictionary is only used for ``Cassandra`` and ``HBase``, no other backend requires this to be set. **kwargs: The optional parameters can also be set as keywords arguments. The keywords must be in camel case. If both options and keywords are set, then the options will be used. Returns: :class:`~geopyspark.geotrellis.Metadata` """ options = options or kwargs or {} _construct_catalog(get_spark_context(), uri, options) cached = _mapped_cached[uri] if layer_type == LayerType.SPATIAL: metadata = cached.store.metadataSpatial(layer_name, layer_zoom) else: metadata = cached.store.metadataSpaceTime(layer_name, layer_zoom) return Metadata.from_dict(json.loads(metadata))
def layer_metadata(self): """Layer metadata associated with this layer.""" return Metadata.from_dict(json.loads(self.srdd.layerMetadata()))
def load_test_collection( collection_id: str, collection_metadata: GeopysparkCubeMetadata, extent, srs: str, from_date: str, to_date: str, bands=None, correlation_id: str = "NA", ) -> Dict[int, geopyspark.TiledRasterLayer]: """ Load synthetic data as test collection :param collection_id: :param collection_metadata: :param extent: :param srs: :param from_date: :param to_date: :param bands: :param correlation_id: :return: """ # TODO: support more test collections assert collection_id == "TestCollection-LonLat4x4" grid_size: float = 1.0 tile_size = 4 # TODO: support other srs'es? assert srs == "EPSG:4326" # Get bounds of tiling layout extent = geopyspark.Extent(extent.xmin(), extent.ymin(), extent.xmax(), extent.ymax()) col_min = int(math.floor(extent.xmin / grid_size)) row_min = int(math.floor(extent.ymin / grid_size)) col_max = int(math.ceil(extent.xmax / grid_size) - 1) row_max = int(math.ceil(extent.ymax / grid_size) - 1) # Simulate sparse range of observation dates from_date = rfc3339.parse_datetime(rfc3339.datetime(from_date)) to_date = rfc3339.parse_datetime(rfc3339.datetime(to_date)) dates = dates_between(from_date, to_date) # Build RDD of tiles with requested bands. tile_builder = TestCollectionLonLat(tile_size=tile_size, grid_size=grid_size) bands = bands or [b.name for b in collection_metadata.bands] rdd_data = [(SpaceTimeKey(col, row, date), tile_builder.get_tile(bands=bands, col=col, row=row, date=date)) for col in range(col_min, col_max + 1) for row in range(row_min, row_max + 1) for date in dates] rdd = SparkContext.getOrCreate().parallelize(rdd_data) metadata = Metadata( bounds=Bounds(SpaceTimeKey(col_min, row_min, min(dates)), SpaceTimeKey(col_max, row_max, max(dates))), crs="+proj=longlat +datum=WGS84 +no_defs ", cell_type=CellType.FLOAT64, extent=extent, layout_definition=LayoutDefinition( extent=geopyspark.Extent(col_min * grid_size, row_min * grid_size, (col_max + 1) * grid_size, (row_max + 1) * grid_size), tileLayout=TileLayout(layoutCols=col_max - col_min + 1, layoutRows=row_max - row_min + 1, tileCols=tile_size, tileRows=tile_size))) layer = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd, metadata) return {0: layer}