Exemplo n.º 1
0
def zfactor_calculator(mapped_zfactors):
    """Produces the Scala class, ``ZFactorCalculator`` as a ``JavaObject``.

    Unlike the ``ZFactorCalculator`` produced in
    :meth:`~geopyspark.geotrellis.zfactor_lat_lng_calculator`, this resulting
    ``ZFactorCalculator`` can used on ``Tile``\s in a different projection. However,
    it cannot be used between different types of projections. For example, a
    ``ZFactorCalculator`` produced for a Layer that is in ``WebMercator`` will not
    create an accurate ``ZFactor`` for a Layer that is in ``LatLng``.

    Args:
        mapped_zfactors(dict): A ``dict`` that maps lattitudes to ``ZFactor``\s.
           It is not required to supply a mapping for ever lattitude intersected
           in the layer. Rather, based on the lattitudes given, a linear interpolation
           will be performed and any lattitude not mapped will have its ``ZFactor``
           derived from that interpolation.

    Returns:
        ``py4j.JavaObject``
    """

    pysc = get_spark_context()
    string_map = {str(k): str(v) for k, v in mapped_zfactors.items()}
    calculator = pysc._gateway.jvm.geopyspark.geotrellis.\
            ZFactorCalculator.createZFactorCalculator(json.dumps(string_map))

    return calculator
Exemplo n.º 2
0
 def __init__(self, uri):
     self.uri = uri
     pysc = get_spark_context()
     try:
         self.wrapper = pysc._gateway.jvm.geopyspark.geotrellis.io.AttributeStoreWrapper(uri)
     except Py4JJavaError as err:
         raise ValueError(err.java_exception.getMessage())
Exemplo n.º 3
0
def rasterize(geoms,
              crs,
              zoom,
              fill_value,
              cell_type=CellType.FLOAT64,
              options=None,
              num_partitions=None):
    """Rasterizes a Shapely geometries.

    Args:
        geoms ([shapely.geometry]): List of shapely geometries to rasterize.
        crs (str or int): The CRS of the input geometry.
        zoom (int): The zoom level of the output raster.
        fill_value (int or float): Value to burn into pixels intersectiong geometry
        cell_type (str or :class:`~geopyspark.geotrellis.constants.CellType`): Which data type the
            cells should be when created. Defaults to ``CellType.FLOAT64``.
        options (:class:`~geopyspark.geotrellis.RasterizerOptions`): Pixel intersection options.

    Returns:
        :class:`~geopyspark.geotrellis.rdd.TiledRasterLayer`
    """

    if isinstance(crs, int):
        crs = str(crs)

    pysc = get_spark_context()
    wkb_geoms = [shapely.wkb.dumps(g) for g in geoms]
    srdd = pysc._gateway.jvm.geopyspark.geotrellis.SpatialTiledRasterLayer.rasterizeGeometry(
        pysc._jsc.sc(), wkb_geoms, crs, zoom, float(fill_value),
        CellType(cell_type).value, options, num_partitions)
    return TiledRasterLayer(LayerType.SPATIAL, srdd)
Exemplo n.º 4
0
    def __init__(self, uri, layer_name, zoom=None):

        self.layer_name = layer_name
        self.zoom = zoom
        pysc = get_spark_context()
        ValueReaderWrapper = pysc._gateway.jvm.geopyspark.geotrellis.io.ValueReaderWrapper
        self.wrapper = ValueReaderWrapper(uri)
Exemplo n.º 5
0
def from_dataframe(dataframe, target_extent=None):
    """Reads OSM data from a Spark ``DataFrame``. The resulting data will be read
    in as an instance of :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`.

    Args:
        dataframe (DataFrame): A Spark ``DataFrame`` that contains the OSM data.
        target_extent (:class:`~geopyspark.geotrellis.Extent` or ``shapely.geometry.Polygon``, optional): The
            area of interest. Only features inside this ``Extent`` will be returned. Default is, ``None``. If
            ``None``, then all of the features will be returned.

    Returns:
        :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`
    """

    if target_extent:
        if isinstance(target_extent, Polygon):
            target_extent = Extent.from_polygon(target_extent)._asdict()
        else:
            target_extent = target_extent._asdict()

    pysc = get_spark_context()
    features = pysc._jvm.geopyspark.vectorpipe.io.OSMReader.fromDataFrame(
        dataframe._jdf, target_extent)

    return FeaturesCollection(features)
Exemplo n.º 6
0
 def from_dict(cls, value):
     """Encodes histogram as a dictionary"""
     pysc = get_spark_context()
     histogram_json = json.dumps(value)
     scala_histogram = pysc._gateway.jvm.geopyspark.geotrellis.Json.readHistogram(
         histogram_json)
     return cls(scala_histogram)
Exemplo n.º 7
0
def test_create_params():
    pysc = gps.get_spark_context()
    gateway = JavaGateway(eager_load=True, gateway_parameters=pysc._gateway.gateway_parameters)
    jvm = gateway.jvm
    datacubeParams = jvm.org.openeo.geotrelliscommon.DataCubeParameters()
    datacubeParams.tileSize = 256
    assert datacubeParams.tileSize == 256
Exemplo n.º 8
0
def euclidean_distance(geometry, source_crs, zoom, cell_type=CellType.FLOAT64):
    """Calculates the Euclidean distance of a Shapely geometry.

    Args:
        geometry (shapely.geometry): The input geometry to compute the Euclidean distance
            for.
        source_crs (str or int): The CRS of the input geometry.
        zoom (int): The zoom level of the output raster.
        cell_type (str or :class:`~geopyspark.geotrellis.constants.CellType`, optional): The data
            type of the cells for the new layer. If not specified, then ``CellType.FLOAT64`` is used.

    Note:
        This function may run very slowly for polygonal inputs if they cover many cells of
        the output raster.

    Returns:
        :class:`~geopyspark.geotrellis.rdd.TiledRasterLayer`
    """

    if isinstance(source_crs, int):
        source_crs = str(source_crs)

    pysc = get_spark_context()

    srdd = pysc._gateway.jvm.geopyspark.geotrellis.SpatialTiledRasterLayer.euclideanDistance(
        pysc._jsc.sc(), shapely.wkb.dumps(geometry), source_crs,
        CellType(cell_type).value, zoom)
    return TiledRasterLayer(LayerType.SPATIAL, srdd)
Exemplo n.º 9
0
 def __init__(self, server):
     self.pysc = get_spark_context()
     self.server = server
     self.bound = False
     self._host = None
     self._port = None
     self.pysc._gateway.start_callback_server()
Exemplo n.º 10
0
def get_layer_ids(uri,
                  options=None,
                  **kwargs):
    """Returns a list of all of the layer ids in the selected catalog as dicts that contain the
    name and zoom of a given layer.

    Args:
        uri (str): The Uniform Resource Identifier used to point towards the desired GeoTrellis
            catalog to be read from. The shape of this string varies depending on backend.
        options (dict, optional): Additional parameters for reading the layer for specific backends.
            The dictionary is only used for ``Cassandra`` and ``HBase``, no other backend requires this
            to be set.
        **kwargs: The optional parameters can also be set as keywords arguments. The keywords must
            be in camel case. If both options and keywords are set, then the options will be used.

    Returns:
        [layerIds]

        Where ``layerIds`` is a ``dict`` with the following fields:
            - **name** (str): The name of the layer
            - **zoom** (int): The zoom level of the given layer.
    """

    options = options or kwargs or {}

    _construct_catalog(get_spark_context(), uri, options)
    cached = _mapped_cached[uri]

    return list(cached.reader.layerIds())
Exemplo n.º 11
0
    def from_histogram(cls,
                       histogram,
                       color_list,
                       no_data_color=0x00000000,
                       fallback=0x00000000,
                       classification_strategy=ClassificationStrategy.
                       LESS_THAN_OR_EQUAL_TO):
        """Converts a wrapped GeoTrellis histogram into a ``ColorMap``.

        Args:
            histogram (:class:`~geopyspark.geotrellis.Histogram`): A ``Histogram`` instance;
                specifies breaks
            color_list ([int]): The colors corresponding to the values in the
                breaks list, represented as integers e.g., 0xff000080 is red
                at half opacity.
            no_data_color(int, optional): A color to replace NODATA values with
            fallback (int, optional): A color to replace cells that have no
                value in the mapping
            classification_strategy (str or :class:`~geopyspark.geotrellis.constants.ClassificationStrategy`, optional):
                A string giving the strategy for converting tile values to colors. e.g., if
                ``ClassificationStrategy.LESS_THAN_OR_EQUAL_TO`` is specified, and the break map is
                {3: 0xff0000ff, 4: 0x00ff00ff}, then values up to 3 map to red, values from above 3
                and up to and including 4 become green, and values over 4 become the fallback color.

        Returns:
            :class:`~geopyspark.geotrellis.color.ColorMap`
        """

        pysc = get_spark_context()

        fn = pysc._gateway.jvm.geopyspark.geotrellis.ColorMapUtils.fromHistogram
        strat = ClassificationStrategy(classification_strategy).value
        return cls(
            fn(histogram.scala_histogram, color_list, no_data_color, fallback,
               strat))
Exemplo n.º 12
0
    def __init__(self, layout, crs=None, extent=None, cellsize=None, dimensions=None):
        self.__jvm = gps.get_spark_context()._gateway.jvm

        if isinstance(layout, gps.LocalLayout):
            if not extent:
                raise ValueError("Must specify an extent when using LocalLayout")

            if dimensions and not cellsize:
                cellsize = ((extent.xmax - extent.xmin)/dimensions[0], (extent.ymax - extent.ymin)/dimensions[1])
                dimensions = None

            if cellsize and not dimensions:
                tilewidth = layout.tile_cols * cellsize[0]
                tileheight = layout.tile_rows * cellsize[1]
                rows = ceil((extent.xmax - extent.xmin) / tilewidth)
                cols = ceil((extent.ymax - extent.ymin) / tileheight)
                extent = gps.Extent(extent.xmin, extent.ymax - rows * tileheight, extent.xmin + cols * tilewidth, extent.ymax)
                tl = gps.TileLayout(cols, rows, layout.tile_cols, layout.tile_rows)
            else:
                raise ValueError("For LocalLayout, must specify exactly one: cellsize or dimension")
        elif isinstance(layout, gps.GlobalLayout):
            try:
                from pyproj import Proj, transform
            except:
                raise ImportError('pyproj is required for GlobalLayout')

            if not layout.zoom:
                raise ValueError("Must specify a zoom level when using GlobalLayout")

            if not crs:
                raise ValueError("Must specify a crs when using GlobalLayout")

            if isinstance(crs, int):
                crs = "{}".format(crs)

            gtcrs = self.__jvm.geopyspark.geotrellis.TileLayer.getCRS(crs).get()

            if gtcrs.epsgCode().isDefined() and gtcrs.epsgCode().get() == 3857:
                extent = WEB_MERCATOR
            elif gtcrs.epsgCode().isDefined() and gtcrs.epsgCode().get() == 4326:
                extent = LATLNG
            else:
                llex = LATLNG
                proj4str = gtcrs.toProj4String()
                target = Proj(proj4str)
                xmin, ymin = target(llex.xmin, llex.ymin)
                xmax, ymax = target(llex.xmax, llex.ymax)
                extent = gps.Extent(xmin, ymin, xmax, ymax)

            layout_rows_cols = int(pow(2, layout.zoom))
            tl = gps.TileLayout(layout_rows_cols, layout_rows_cols, layout.tile_size, layout.tile_size)
        elif isinstance(layout, gps.LayoutDefinition):
            extent = layout.extent
            tl = layout.tileLayout

        ex = self.__jvm.geotrellis.vector.Extent(float(extent.xmin), float(extent.ymin), float(extent.xmax), float(extent.ymax))
        tilelayout = self.__jvm.geotrellis.raster.TileLayout(int(tl[0]), int(tl[1]), int(tl[2]), int(tl[3]))
        self.layout = gps.LayoutDefinition(extent, tl)
        self.__layout = self.__jvm.geotrellis.spark.tiling.LayoutDefinition(ex, tilelayout)
Exemplo n.º 13
0
def combine_bands(layers):
    """Combines the bands of values that share the same key in two or more ``TiledRasterLayer``\s.

    This method will concat the bands of two or more values with the same key. For example,
    ``layer a`` has values that have 2 bands and ``layer b`` has values with 1 band. When
    ``combine_bands`` is used on both of these layers, then the resulting layer will have
    values with 3 bands, 2 from ``layer a`` and 1 from ``layer b``.

    Note:
        All layers must have the same ``layer_type``. If the layers are ``TiledRasterLayer``\s,
        then all of the layers must also have the same :class:`~geopyspark.geotrellis.TileLayout`
        and ``CRS``.

    Args:
        layers ([:class:`~geopyspark.RasterLayer`] or [:class:`~geopyspark.TiledRasterLayer`] or (:class:`~geopyspark.RasterLayer`) or (:class:`~geopyspark.TiledRasterLayer`)): A
            colection of two or more ``RasterLayer``\s or ``TiledRasterLayer``\s. **The order of the
            layers determines the order in which the bands are concatenated**. With the bands being
            ordered based on the position of their respective layer.

            For example, the first layer in ``layers`` is ``layer a`` which contains 2 bands and
            the second layer is ``layer b`` whose values have 1 band. The resulting layer will
            have values with 3 bands: the first 2 are from ``layer a`` and the third from ``layer b``.
            If the positions of ``layer a`` and ``layer b`` are reversed, then the resulting values'
            first band will be from ``layer b`` and the last 2 will be from ``layer a``.

    Returns:
        :class:`~geopyspark.RasterLayer` or :class:`~geopyspark.TiledRasterLayer`
    """

    if len(layers) == 1:
        raise ValueError(
            "combine_bands can only be performed on 2 or more layers")

    base_layer = layers[0]
    base_layer_type = base_layer.layer_type

    check_layers(base_layer, base_layer_type, layers)

    pysc = get_spark_context()

    if isinstance(base_layer, RasterLayer):
        if base_layer_type == LayerType.SPATIAL:
            result = pysc._gateway.jvm.geopyspark.geotrellis.ProjectedRasterLayer.combineBands(
                pysc._jsc.sc(), [x.srdd for x in layers])
        else:
            result = pysc._gateway.jvm.geopyspark.geotrellis.TemporalRasterLayer.combineBands(
                pysc._jsc.sc(), [x.srdd for x in layers])

        return RasterLayer(base_layer_type, result)

    else:
        if base_layer_type == LayerType.SPATIAL:
            result = pysc._gateway.jvm.geopyspark.geotrellis.SpatialTiledRasterLayer.combineBands(
                pysc._jsc.sc(), [x.srdd for x in layers])
        else:
            result = pysc._gateway.jvm.geopyspark.geotrellis.TemporalTiledRasterLayer.combineBands(
                pysc._jsc.sc(), [x.srdd for x in layers])
        return TiledRasterLayer(base_layer_type, result)
Exemplo n.º 14
0
    def build(cls,
              breaks,
              colors=None,
              no_data_color=0x00000000,
              fallback=0x00000000,
              classification_strategy=ClassificationStrategy.
              LESS_THAN_OR_EQUAL_TO):
        """Given breaks and colors, build a ``ColorMap`` object.

        Args:
            breaks (dict or list or :class:`~geopyspark.geotrellis.Histogram`): If a ``dict`` then a
                mapping from tile values to colors, the latter represented as integers
                e.g., 0xff000080 is red at half opacity. If a ``list`` then tile values that
                specify breaks in the color mapping. If a ``Histogram`` then a histogram from which
                breaks can be derived.
            colors (str or list, optional):  If a ``str`` then the name of a matplotlib color ramp.
                If a ``list`` then either a list of colortools ``Color`` objects or a list
                of integers containing packed RGBA values. If ``None``, then the ``ColorMap`` will
                be created from the ``breaks`` given.
            no_data_color(int, optional): A color to replace NODATA values with
            fallback (int, optional): A color to replace cells that have no
                value in the mapping
            classification_strategy (str or :class:`~geopyspark.geotrellis.constants.ClassificationStrategy`, optional):
                A string giving the strategy for converting tile values to colors. e.g., if
                ``ClassificationStrategy.LESS_THAN_OR_EQUAL_TO`` is specified, and the break map is
                {3: 0xff0000ff, 4: 0x00ff00ff}, then values up to 3 map to red, values from above 3
                and up to and including 4 become green, and values over 4 become the fallback color.

        Returns:
            :class:`~geopyspark.geotrellis.color.ColorMap`
        """

        pysc = get_spark_context()

        if isinstance(breaks, dict):
            return ColorMap.from_break_map(breaks, no_data_color, fallback,
                                           classification_strategy)

        if isinstance(colors, str):
            color_list = get_colors_from_matplotlib(colors)
        elif isinstance(colors, list):
            if all(isinstance(c, int) for c in colors):
                color_list = colors
            else:
                color_list = get_colors_from_colors(colors)
        else:
            raise ValueError(
                "Could not construct ColorMap from the given colors", colors)

        if isinstance(breaks, list):
            return ColorMap.from_colors(breaks, color_list, no_data_color,
                                        fallback, classification_strategy)
        elif isinstance(breaks, Histogram):
            return ColorMap.from_histogram(breaks, color_list, no_data_color,
                                           fallback, classification_strategy)
        else:
            raise ValueError(
                "Could not construct ColorMap from the given breaks", breaks)
Exemplo n.º 15
0
def get(uri,
        extensions=['.shp', '.SHP'],
        num_partitions=None,
        s3_client=DEFAULT_S3_CLIENT):
    """Creates an ``RDD[Feature]`` from Shapefile(s) that are located on the local file system, ``HDFS``,
    or ``S3``.

    The ``properties`` of the ``Feautre``\s in the ``RDD`` will contain the attributes of their
    respective geometry in a ``dict``. All keys and values of each ``dict`` will be ``str``\s regardless
    of how the attribute is represented in the Shapefile.

    Note:
        This feature is currently experimental and will most likely change in the coming versions of
        GPS.

    Note:
        When reading from S3, the desired files **must** be publicly readable. Otherwise, you will
        get 403 errors.

        Due to the nature of how GPS reads Shapefile(s) from S3, the ``mock`` S3 Client cannot
        currently be used.

    Args:
        uri (str or [str]): The path or list of paths to the desired Shapfile(s)/directory(ies).
        extensions ([str], optional): A list of the extensions that the Shapefile(s) have.
            These are ``.shp`` and ``.SHP`` by default.
        num_partitions (int, optional): The number of partitions Spark
            will make when the ``RDD`` is created. If ``None``, then the
            ``defaultParallelism`` will be used.
        s3_client (str, optional): Which ``S3Cleint`` to use when reading
            GeoTiffs from S3. There are currently two options: ``default`` and
            ``mock``. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_S3_CLIENT`.

            Note:
                ``mock`` should only be used in unit tests and debugging.

    Returns:
        ``RDD[:class:`~geopyspark.geotrellis.Feature`]``
    """

    pysc = get_spark_context()

    num_partitions = num_partitions or pysc.defaultParallelism

    shapefile = pysc._gateway.jvm.geopyspark.geotools.shapefile.ShapefileRDD

    if isinstance(uri, (list, tuple)):
        jrdd = shapefile.get(pysc._jsc.sc(), uri, extensions, num_partitions,
                             s3_client)
    else:
        jrdd = shapefile.get(pysc._jsc.sc(), [uri], extensions, num_partitions,
                             s3_client)

    ser = ProtoBufSerializer(feature_decoder, None)

    return create_python_rdd(jrdd, ser)
Exemplo n.º 16
0
    def write_assets(self, directory: str) -> Dict:
        """
        Save generated assets into a directory, return asset metadata.

        :return: STAC assets dictionary: https://github.com/radiantearth/stac-spec/blob/master/item-spec/item-spec.md#assets
        """
        directory = pathlib.Path(directory).parent
        filename = str(pathlib.Path(directory) / "mlmodel.model")
        self._model.save(gps.get_spark_context() , filename)
        return {filename:{"href":filename}}
Exemplo n.º 17
0
    def to_dict(self):
        """Encodes histogram as a dictionary

        Returns:
           ``dict``
        """

        pysc = get_spark_context()
        histogram_json = pysc._gateway.jvm.geopyspark.geotrellis.Json.writeHistogram(
            self.scala_histogram)
        return json.loads(histogram_json)
Exemplo n.º 18
0
    def __init__(self, uri, layer_name, zoom=None, store=None):
        if store:
            self.store = AttributeStore.build(store)
        else:
            self.store = AttributeStore.cached(uri)

        self.layer_name = layer_name
        self.zoom = zoom
        pysc = get_spark_context()
        scala_store = self.store.wrapper.attributeStore()
        ValueReaderWrapper = pysc._gateway.jvm.geopyspark.geotrellis.io.ValueReaderWrapper
        self.wrapper = ValueReaderWrapper(scala_store, uri)
Exemplo n.º 19
0
    def load_disk_data(self, format: str, glob_pattern: str, options: dict,
                       viewing_parameters: dict) -> object:
        if format != 'GTiff':
            raise NotImplementedError(
                "The format is not supported by the backend: " + format)

        date_regex = options['date_regex']

        if glob_pattern.startswith("hdfs:"):
            kerberos()

        from_date = normalize_date(viewing_parameters.get("from", None))
        to_date = normalize_date(viewing_parameters.get("to", None))

        left = viewing_parameters.get("left", None)
        right = viewing_parameters.get("right", None)
        top = viewing_parameters.get("top", None)
        bottom = viewing_parameters.get("bottom", None)
        srs = viewing_parameters.get("srs", None)
        band_indices = viewing_parameters.get("bands")

        sc = gps.get_spark_context()

        gateway = JavaGateway(
            eager_load=True, gateway_parameters=sc._gateway.gateway_parameters)
        jvm = gateway.jvm

        extent = jvm.geotrellis.vector.Extent(float(left), float(bottom), float(right), float(top)) \
            if left is not None and right is not None and top is not None and bottom is not None else None

        pyramid = jvm.org.openeo.geotrellis.geotiff.PyramidFactory.from_disk(glob_pattern, date_regex) \
            .pyramid_seq(extent, srs, from_date, to_date)

        temporal_tiled_raster_layer = jvm.geopyspark.geotrellis.TemporalTiledRasterLayer
        option = jvm.scala.Option
        levels = {
            pyramid.apply(index)._1(): TiledRasterLayer(
                LayerType.SPACETIME,
                temporal_tiled_raster_layer(
                    option.apply(pyramid.apply(index)._1()),
                    pyramid.apply(index)._2()))
            for index in range(0, pyramid.size())
        }

        image_collection = GeotrellisTimeSeriesImageCollection(
            pyramid=gps.Pyramid(levels),
            service_registry=self._service_registry,
            metadata={})

        return image_collection.band_filter(
            band_indices) if band_indices else image_collection
Exemplo n.º 20
0
def from_orc(source, target_extent=None):
    """Reads in OSM data from an orc file that is located either locally or on S3. The
    resulting data will be read in as an instance of :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`.

    Args:
        source (str): The path or URI to the orc file to be read. Can either be a local file, or
            a file on S3.

            Note:
                Reading a file from S3 requires additional setup depending on the environment
                and how the file is being read.

                The following describes the parameters that need to be set depending on
                how the files are to be read in. However, **if reading a file on EMR, then
                the access key and secret key do not need to be set**.

                If using ``s3a://``, then the following ``SparkConf`` parameters need to be set:
                    - ``spark.hadoop.fs.s3a.impl``
                    - ``spark.hadoop.fs.s3a.access.key``
                    - ``spark.hadoop.fs.s3a.secret.key``

                If using ``s3n://``, then the following ``SparkConf`` parameters need to be set:
                    - ``spark.hadoop.fs.s3n.access.key``
                    - ``spark.hadoop.fs.s3n.secret.key``

                An alternative to passing in your S3 credentials to ``SparkConf`` would be
                to export them as environment variables:
                    - ``AWS_ACCESS_KEY_ID=YOUR_KEY``
                    - ``AWS_SECRET_ACCESS_KEY_ID=YOUR_SECRET_KEY``
        target_extent (:class:`~geopyspark.geotrellis.Extent` or ``shapely.geometry.Polygon``, optional): The
            area of interest. Only features inside this ``Extent`` will be returned. Default is, ``None``. If
            ``None``, then all of the features will be returned.

    Returns:
        :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`
    """

    if target_extent:
        if isinstance(target_extent, Polygon):
            target_extent = Extent.from_polygon(target_extent)._asdict()
        else:
            target_extent = target_extent._asdict()

    pysc = get_spark_context()
    session = SparkSession.builder.config(
        conf=pysc.getConf()).enableHiveSupport().getOrCreate()
    features = pysc._jvm.geopyspark.vectorpipe.io.OSMReader.fromORC(
        session._jsparkSession, source, target_extent)

    return FeaturesCollection(features)
Exemplo n.º 21
0
def from_dataframe(dataframe):
    """Reads OSM data from a Spark ``DataFrame``. The resulting data will be read
    in as an instance of :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`.

    Args:
        dataframe (DataFrame): A Spark ``DataFrame`` that contains the OSM data.

    Returns:
        :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`
    """

    pysc = get_spark_context()
    features = pysc._jvm.geopyspark.vectorpipe.io.OSMReader.fromDataFrame(
        dataframe._jdf)

    return FeaturesCollection(features)
Exemplo n.º 22
0
def crs_to_proj4(crs):
    """Converts a given CRS to a Proj4 string.

    Args:
        crs (str or int): Target CRS of reprojection. Either EPSG code, well-known name, or a
            PROJ.4 string. If ``None``, no reproject will be perfomed.

    Returns:
        str
    """

    if not isinstance(crs, str):
        crs = str(crs)

    pysc = get_spark_context()
    scala_crs = pysc._gateway.jvm.geopyspark.geotrellis.TileLayer.getCRS(crs).get()

    return scala_crs.toProj4String()
Exemplo n.º 23
0
def set_s3_credentials(credentials, uri_type):
    """Temporarily updates the session's Amazon S3 credentials for the
       duration of the context.

    Args:
        credentials (Credentials): The access and secret keys used to access
            Amazon S3 resources.
        uri_type (str): The URI type. 's3', 's3a', or 's3n'.
    """
    if credentials:
        if uri_type not in _S3_URI_PREFIXES:
            raise RuntimeError(
                'Cannot set S3 credentials for unrecognized URI type '
                '{}'.format(uri_type))
        configuration = get_spark_context()._conf
        with _set_s3_credentials(credentials, configuration, uri_type):
            yield
    else:
        yield
Exemplo n.º 24
0
def from_orc(source):
    """Reads in OSM data from an orc file that is located either locally or on S3. The
    resulting data will be read in as an instance of :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`.

    Args:
        source (str): The path or URI to the orc file to be read. Can either be a local file, or
            a file on S3.

            Note:
                Reading a file from S3 requires additional setup depending on the environment
                and how the file is being read.

                The following describes the parameters that need to be set depending on
                how the files are to be read in. However, **if reading a file on EMR, then
                the access key and secret key do not need to be set**.

                If using ``s3a://``, then the following ``SparkConf`` parameters need to be set:
                    - ``spark.hadoop.fs.s3a.impl``
                    - ``spark.hadoop.fs.s3a.access.key``
                    - ``spark.hadoop.fs.s3a.secret.key``

                If using ``s3n://``, then the following ``SparkConf`` parameters need to be set:
                    - ``spark.hadoop.fs.s3n.access.key``
                    - ``spark.hadoop.fs.s3n.secret.key``

                An alternative to passing in your S3 credentials to ``SparkConf`` would be
                to export them as environment variables:
                    - ``AWS_ACCESS_KEY_ID=YOUR_KEY``
                    - ``AWS_SECRET_ACCESS_KEY_ID=YOUR_SECRET_KEY``

    Returns:
        :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`
    """

    pysc = get_spark_context()
    session = SparkSession.builder.config(
        conf=pysc.getConf()).enableHiveSupport().getOrCreate()
    features = pysc._jvm.geopyspark.vectorpipe.io.OSMReader.fromORC(
        session._jsparkSession, source)

    return FeaturesCollection(features)
Exemplo n.º 25
0
def zfactor_lat_lng_calculator(unit):
    """Produces the Scala class, ``ZFactorCalculator`` as a ``JavaObject``.

    The resulting ``ZFactorCalculator`` produced using this method assumes that
    the ``Tile``\s it will be deriving ``zfactor``\s from are in ``LatLng``
    (aka ``epsg:4326``). This caculator can still be used on ``Tile``\s with
    different projections, however, the resulting ``Slope`` calculations may
    be off.

    Args:
        units (str or :class:`~geopyspark.geotrellis.constant.Unit`): The unit of elevation
            in the target layer.

    Returns:
        ``py4j.JavaObject``
    """

    pysc = get_spark_context()
    calculator = pysc._gateway.jvm.geopyspark.geotrellis.\
            ZFactorCalculator.createLatLngZFactorCalculator(Unit(unit).value)

    return calculator
Exemplo n.º 26
0
def get(data_source,
        xcols=DEFAULT_MAX_TILE_SIZE,
        ycols=DEFAULT_MAX_TILE_SIZE,
        bands=None,
        crs_to_proj4=crs_to_proj4):
    """Creates an ``RDD`` of windows represented as the key value pair: ``(ProjectedExtent, Tile)``
    from URIs using rasterio.

    Args:
        data_source (str or [str] or RDD): The source of the data to be windowed.
            Can either be URI or list of URIs which point to where the source data can be found;
            or it can be an ``RDD`` that contains the URIs.
        xcols (int, optional): The desired tile width. If the size is smaller than
            the width of the read in tile, then that tile will be broken into smaller sections
            of the given size. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_MAX_TILE_SIZE`.
        ycols (int, optional): The desired tile height. If the size is smaller than
            the height of the read in tile, then that tile will be broken into smaller sections
            of the given size. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_MAX_TILE_SIZE`.
        bands ([int], opitonal): The bands from which windows should be produced given as a list
            of ``int``\s. Defaults to ``None`` which causes all bands to be read.
        crs_to_proj4 (``rasterio.crs.CRS`` => str, optional) A funtion that takes a :class:`rasterio.crs.CRS`
            and returns a Proj4 string. Default is :func:`geopyspark.geotrellis.rasterio.crs_to_proj4`.

    Returns:
        RDD
    """

    pysc = gps.get_spark_context()

    if isinstance(data_source, (list, str)):
        if isinstance(data_source, str):
            data_source = [data_source]

        return pysc.\
                parallelize(data_source, len(data_source)).\
                flatMap(lambda ds: _read_windows(ds, xcols, ycols, bands, crs_to_proj4))
    else:
        return data_source.flatMap(
            lambda ds: _read_windows(ds, xcols, ycols, bands, crs_to_proj4))
Exemplo n.º 27
0
    def from_colors(cls,
                    breaks,
                    color_list,
                    no_data_color=0x00000000,
                    fallback=0x00000000,
                    classification_strategy=ClassificationStrategy.
                    LESS_THAN_OR_EQUAL_TO):
        """Converts lists of values and colors to a ``ColorMap``.

        Args:
            breaks (list): The tile values that specify breaks in the color
                mapping.
            color_list ([int]): The colors corresponding to the values in the
                breaks list, represented as integers---e.g., 0xff000080 is red
                at half opacity.
            no_data_color(int, optional): A color to replace NODATA values with
            fallback (int, optional): A color to replace cells that have no
                value in the mapping
            classification_strategy (str or :class:`~geopyspark.geotrellis.constants.ClassificationStrategy`, optional):
                A string giving the strategy for converting tile values to colors. e.g., if
                ``ClassificationStrategy.LESS_THAN_OR_EQUAL_TO`` is specified, and the break map is
                {3: 0xff0000ff, 4: 0x00ff00ff}, then values up to 3 map to red, values from above 3
                and up to and including 4 become green, and values over 4 become the fallback color.

        Returns:
            :class:`~geopyspark.geotrellis.color.ColorMap`
        """

        pysc = get_spark_context()

        if all(isinstance(x, int) for x in breaks):
            fn = pysc._gateway.jvm.geopyspark.geotrellis.ColorMapUtils.fromBreaks
            strat = ClassificationStrategy(classification_strategy).value
            return cls(fn(breaks, color_list, no_data_color, fallback, strat))
        else:
            fn = pysc._gateway.jvm.geopyspark.geotrellis.ColorMapUtils.fromBreaksDouble
            arr = [float(br) for br in breaks]
            strat = ClassificationStrategy(classification_strategy).value
            return cls(fn(arr, color_list, no_data_color, fallback, strat))
Exemplo n.º 28
0
def kerberos():
    import geopyspark as gps

    if 'HADOOP_CONF_DIR' not in os.environ:
        logger.warn(
            'HADOOP_CONF_DIR is not set. Kerberos based authentication will probably not be set up correctly.'
        )

    sc = gps.get_spark_context()
    gateway = JavaGateway(gateway_parameters=sc._gateway.gateway_parameters)
    jvm = gateway.jvm

    hadoop_auth = jvm.org.apache.hadoop.conf.Configuration().get(
        'hadoop.security.authentication')
    if hadoop_auth != 'kerberos':
        logger.warn(
            'Hadoop client does not have hadoop.security.authentication=kerberos.'
        )

    currentUser = jvm.org.apache.hadoop.security.UserGroupInformation.getCurrentUser(
    )
    if currentUser.hasKerberosCredentials():
        return
    logger.info("Kerberos currentUser={u!r} isSecurityEnabled={s!r}".format(
        u=currentUser.toString(),
        s=jvm.org.apache.hadoop.security.UserGroupInformation.
        isSecurityEnabled()))
    # print(jvm.org.apache.hadoop.security.UserGroupInformation.getCurrentUser().getAuthenticationMethod().toString())

    principal = sc.getConf().get("spark.yarn.principal")
    sparkKeytab = sc.getConf().get("spark.yarn.keytab")
    if principal is not None and sparkKeytab is not None:
        jvm.org.apache.hadoop.security.UserGroupInformation.loginUserFromKeytab(
            principal, sparkKeytab)
        jvm.org.apache.hadoop.security.UserGroupInformation.getCurrentUser(
        ).setAuthenticationMethod(
            jvm.org.apache.hadoop.security.UserGroupInformation.
            AuthenticationMethod.KERBEROS)
Exemplo n.º 29
0
def read_layer_metadata(layer_type,
                        uri,
                        layer_name,
                        layer_zoom,
                        options=None,
                        **kwargs):
    """Reads the metadata from a saved layer without reading in the whole layer.

    Args:
        layer_type (str or :class:`geopyspark.geotrellis.constants.LayerType`): What the spatial type
            of the geotiffs are. This is represented by either constants within ``LayerType`` or by
            a string.
        uri (str): The Uniform Resource Identifier used to point towards the desired GeoTrellis
            catalog to be read from. The shape of this string varies depending on backend.
        layer_name (str): The name of the GeoTrellis catalog to be read from.
        layer_zoom (int): The zoom level of the layer that is to be read.
        options (dict, optional): Additional parameters for reading the layer for specific backends.
            The dictionary is only used for ``Cassandra`` and ``HBase``, no other backend requires
            this to be set.
        **kwargs: The optional parameters can also be set as keywords arguments. The keywords must
            be in camel case. If both options and keywords are set, then the options will be used.

    Returns:
        :class:`~geopyspark.geotrellis.Metadata`
    """

    options = options or kwargs or {}

    _construct_catalog(get_spark_context(), uri, options)
    cached = _mapped_cached[uri]

    if layer_type == LayerType.SPATIAL:
        metadata = cached.store.metadataSpatial(layer_name, layer_zoom)
    else:
        metadata = cached.store.metadataSpaceTime(layer_name, layer_zoom)

    return Metadata.from_dict(json.loads(metadata))
Exemplo n.º 30
0
    def from_break_map(cls,
                       break_map,
                       no_data_color=0x00000000,
                       fallback=0x00000000,
                       classification_strategy=ClassificationStrategy.
                       LESS_THAN_OR_EQUAL_TO):
        """Converts a dictionary mapping from tile values to colors to a ColorMap.

        Args:
            break_map (dict): A mapping from tile values to colors, the latter
                represented as integers e.g., 0xff000080 is red at half opacity.
            no_data_color(int, optional): A color to replace NODATA values with
            fallback (int, optional): A color to replace cells that have no
                value in the mapping
            classification_strategy (str or :class:`~geopyspark.geotrellis.constants.ClassificationStrategy`, optional):
                A string giving the strategy for converting tile values to colors. e.g., if
                ``ClassificationStrategy.LESS_THAN_OR_EQUAL_TO`` is specified, and the break map is
                {3: 0xff0000ff, 4: 0x00ff00ff}, then values up to 3 map to red, values from above 3
                and up to and including 4 become green, and values over 4 become the fallback color.

        Returns:
            :class:`~geopyspark.geotrellis.color.ColorMap`
        """

        pysc = get_spark_context()

        if all(isinstance(x, int) for x in break_map.keys()):
            fn = pysc._gateway.jvm.geopyspark.geotrellis.ColorMapUtils.fromMap
            strat = ClassificationStrategy(classification_strategy).value
            return cls(fn(break_map, no_data_color, fallback, strat))
        elif all(isinstance(x, float) for x in break_map.keys()):
            fn = pysc._gateway.jvm.geopyspark.geotrellis.ColorMapUtils.fromMapDouble
            strat = ClassificationStrategy(classification_strategy).value
            return cls(fn(break_map, no_data_color, fallback, strat))
        else:
            raise TypeError("Break map keys must be either int or float.")