示例#1
0
def query(uri,
          layer_name,
          layer_zoom=None,
          query_geom=None,
          time_intervals=None,
          query_proj=None,
          num_partitions=None):
    """Queries a single, zoom layer from a GeoTrellis catalog given spatial and/or time parameters.

    Note:
        The whole layer could still be read in if ``intersects`` and/or ``time_intervals`` have not
        been set, or if the querried region contains the entire layer.

    Args:
        layer_type (str or :class:`~geopyspark.geotrellis.constants.LayerType`): What the layer type
            of the geotiffs are. This is represented by either constants within ``LayerType`` or by
            a string.
        uri (str): The Uniform Resource Identifier used to point towards the desired GeoTrellis
            catalog to be read from. The shape of this string varies depending on backend.
        layer_name (str): The name of the GeoTrellis catalog to be querried.
        layer_zoom (int, optional): The zoom level of the layer that is to be querried.
            If ``None``, then the ``layer_zoom`` will be set to 0.
        query_geom (bytes or shapely.geometry or :class:`~geopyspark.geotrellis.Extent`, Optional):
            The desired spatial area to be returned. Can either be a string, a shapely geometry, or
            instance of ``Extent``, or a WKB verson of the geometry.

            Note:
                Not all shapely geometires are supported. The following is are the types that are
                supported:
                * Point
                * Polygon
                * MultiPolygon

            Note:
                Only layers that were made from spatial, singleband GeoTiffs can query a ``Point``.
                All other types are restricted to ``Polygon`` and ``MulitPolygon``.

            Note:
                If the queried region does not intersect the layer, then an empty layer will be
                returned.

            If not specified, then the entire layer will be read.
        time_intervals (``[datetime.datetime]``, optional): A list of the time intervals to query.
            This parameter is only used when querying spatial-temporal data. The default value is,
            ``None``. If ``None``, then only the spatial area will be querried.
        query_proj (int or str, optional): The crs of the querried geometry if it is different
            than the layer it is being filtered against. If they are different and this is not set,
            then the returned ``TiledRasterLayer`` could contain incorrect values. If ``None``,
            then the geometry and layer are assumed to be in the same projection.
        num_partitions (int, optional): Sets RDD partition count when reading from catalog.

    Returns:
        :class:`~geopyspark.geotrellis.layer.TiledRasterLayer`
    """

    pysc = get_spark_context()
    layer_zoom = layer_zoom or 0

    if query_geom is None:
        pass  # pass as Null to Java
    elif isinstance(query_geom, Extent):
        query_geom = query_geom.to_polygon
        query_geom = shapely.wkb.dumps(query_geom)
    elif isinstance(query_geom, (Polygon, MultiPolygon, Point)):
        query_geom = shapely.wkb.dumps(query_geom)
    elif isinstance(query_geom, bytes):
        pass  # assume bytes are WKB
    else:
        raise TypeError("Could not query intersection", query_geom)

    if isinstance(query_proj, int):
        query_proj = str(query_proj)

    if time_intervals:
        for x, time in enumerate(time_intervals):
            if time.tzinfo:
                time_intervals[x] = time.astimezone(pytz.utc).isoformat()
            else:
                time_intervals[x] = time.replace(tzinfo=pytz.utc).isoformat()
    else:
        time_intervals = []

    reader = pysc._gateway.jvm.geopyspark.geotrellis.io.LayerReaderWrapper(pysc._jsc.sc())
    srdd = reader.query(uri,
                        layer_name, layer_zoom,
                        query_geom, time_intervals, query_proj,
                        num_partitions)

    layer_type = LayerType._from_key_name(srdd.keyClassName())

    return TiledRasterLayer(layer_type, srdd)
示例#2
0
def get(layer_type,
        uri,
        crs=None,
        max_tile_size=DEFAULT_MAX_TILE_SIZE,
        num_partitions=None,
        chunk_size=DEFAULT_CHUNK_SIZE,
        partition_bytes=DEFAULT_PARTITION_BYTES,
        time_tag=DEFAULT_GEOTIFF_TIME_TAG,
        time_format=DEFAULT_GEOTIFF_TIME_FORMAT,
        delimiter=None,
        s3_client=DEFAULT_S3_CLIENT,
        s3_credentials=None):
    """Creates a ``RasterLayer`` from GeoTiffs that are located on the local file system, ``HDFS``,
    or ``S3``.

    Args:
        layer_type (str or :class:`~geopyspark.geotrellis.constants.LayerType`): What the layer type
            of the geotiffs are. This is represented by either constants within ``LayerType`` or by
            a string.

            Note:
                All of the GeoTiffs must have the same saptial type.

        uri (str or [str]): The path or list of paths to the desired tile(s)/directory(ies).
        crs (str or int, optional): The CRS that the output tiles should be
            in. If ``None``, then the CRS that the tiles were originally in
            will be used.
        max_tile_size (int or None, optional): The max size of each tile in the
            resulting Layer. If the size is smaller than the read in tile,
            then that tile will be broken into smaller sections of the given
            size. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_MAX_TILE_SIZE`.
            If ``None``, then the whole tile will be read in.
        num_partitions (int, optional): The number of partitions Spark
            will make when the data is repartitioned. If ``None``, then the
            data will not be repartitioned.

            Note:
                If ``max_tile_size`` is also specified then this parameter
                will be ignored.

        partition_bytes (int, optional): The desired number of bytes per
            partition. This is will ensure that at least one item is assigned for
            each partition. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_PARTITION_BYTES`.
        chunk_size (int, optional): How many bytes of the file should be
            read in at a time. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_CHUNK_SIZE`.
        time_tag (str, optional): The name of the tiff tag that contains
            the time stamp for the tile.
            Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_GEOTIFF_TIME_TAG`.
        time_format (str, optional): The pattern of the time stamp to be parsed.
            Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_GEOTIFF_TIME_FORMAT`.
        delimiter (str, optional): The delimiter to use for S3 object listings.

            Note:
                This parameter will only be used when reading from S3.

        s3_client (str, optional): Which ``S3Client`` to use when reading
            GeoTiffs from S3. There are currently two options: ``default`` and
            ``mock``. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_S3_CLIENT`.

            Note:
                ``mock`` should only be used in unit tests and debugging.

        s3_credentials(:class:`~geopyspark.geotrellis.s3.Credentials`, optional): Alternative Amazon S3
            credentials to use when accessing the tile(s).

    Returns:
        :class:`~geopyspark.geotrellis.layer.RasterLayer`

    Raises:
        RuntimeError: ``s3_credentials`` were specified but the specified ``uri`` was not S3-based.
    """
    inputs = {k: v for k, v in locals().items() if v is not None}

    pysc = get_spark_context()
    geotiff_rdd = pysc._gateway.jvm.geopyspark.geotrellis.io.geotiff.GeoTiffRDD

    key = LayerType(inputs.pop('layer_type'))._key_name(False)
    partition_bytes = str(inputs.pop('partition_bytes'))

    uri = inputs.pop('uri')
    uris = (uri if isinstance(uri, list) else [uri])

    try:
        s3_credentials = inputs.pop('s3_credentials')
    except KeyError:
        s3_credentials = None
    else:
        _validate_s3_credentials(uri, s3_credentials)

    uri_type = uri.split(":")[0]

    with set_s3_credentials(s3_credentials, uri_type):
        srdd = geotiff_rdd.get(pysc._jsc.sc(), key, uris, inputs,
                               partition_bytes)

    return RasterLayer(layer_type, srdd)
示例#3
0
def query(layer_type,
          uri,
          layer_name,
          layer_zoom=None,
          query_geom=None,
          time_intervals=None,
          query_proj=None,
          options=None,
          num_partitions=None,
          **kwargs):
    """Queries a single, zoom layer from a GeoTrellis catalog given spatial and/or time parameters.
    Unlike read, this method will only return part of the layer that intersects the specified
    region.

    Note:
        The whole layer could still be read in if ``intersects`` and/or ``time_intervals`` have not
        been set, or if the querried region contains the entire layer.

    Args:
        layer_type (str or :class:`geopyspark.geotrellis.constants.LayerType`): What the spatial type
            of the geotiffs are. This is represented by either constants within ``LayerType`` or by
            a string.
        uri (str): The Uniform Resource Identifier used to point towards the desired GeoTrellis
            catalog to be read from. The shape of this string varies depending on backend.
        layer_name (str): The name of the GeoTrellis catalog to be querried.
        layer_zoom (int, optional): The zoom level of the layer that is to be querried.
            If ``None``, then the ``layer_zoom`` will be set to 0.
        query_geom (bytes or shapely.geometry or :class:`~geopyspark.geotrellis.Extent`, Optional):
            The desired spatial area to be returned. Can either be a string, a shapely geometry, or
            instance of ``Extent``, or a WKB verson of the geometry.

            Note:
                Not all shapely geometires are supported. The following is are the types that are
                supported:
                * Point
                * Polygon
                * MultiPolygon

            Note:
                Only layers that were made from spatial, singleband GeoTiffs can query a ``Point``.
                All other types are restricted to ``Polygon`` and ``MulitPolygon``.

            If not specified, then the entire layer will be read.
        time_intervals (``[datetime.datetime]``, optional): A list of the time intervals to query.
            This parameter is only used when querying spatial-temporal data. The default value is,
            ``None``. If ``None``, then only the spatial area will be querried.
        query_proj (int or str, optional): The crs of the querried geometry if it is different
            than the layer it is being filtered against. If they are different and this is not set,
            then the returned ``TiledRasterLayer`` could contain incorrect values. If ``None``,
            then the geometry and layer are assumed to be in the same projection.
        options (dict, optional): Additional parameters for querying the tile for specific backends.
            The dictioanry is only used for ``Cassandra`` and ``HBase``, no other backend requires
            this to be set.
        num_partitions (int, optional): Sets RDD partition count when reading from catalog.
        **kwargs: The optional parameters can also be set as keywords arguements. The keywords must
            be in camel case. If both options and keywords are set, then the options will be used.

    Returns:
        :class:`~geopyspark.geotrellis.rdd.TiledRasterLayer`
    """

    options = options or kwargs or {}
    layer_zoom = layer_zoom or 0

    pysc = get_spark_context()

    _construct_catalog(pysc, uri, options)

    cached = _mapped_cached[uri]

    key = map_key_input(LayerType(layer_type).value, True)

    num_partitions = num_partitions or pysc.defaultMinPartitions

    if not query_geom:
        srdd = cached.reader.read(key, layer_name, layer_zoom, num_partitions)
        return TiledRasterLayer(layer_type, srdd)

    else:
        if time_intervals:
            time_intervals = [time.astimezone(pytz.utc).isoformat() for time in time_intervals]
        else:
            time_intervals = []

        query_proj = query_proj or ""

        if isinstance(query_proj, int):
            query_proj = str(query_proj)

        if isinstance(query_geom, (Polygon, MultiPolygon, Point)):
            srdd = cached.reader.query(key,
                                       layer_name,
                                       layer_zoom,
                                       shapely.wkb.dumps(query_geom),
                                       time_intervals,
                                       query_proj,
                                       num_partitions)

        elif isinstance(query_geom, Extent):
            srdd = cached.reader.query(key,
                                       layer_name,
                                       layer_zoom,
                                       shapely.wkb.dumps(query_geom.to_polygon),
                                       time_intervals,
                                       query_proj,
                                       num_partitions)

        elif isinstance(query_geom, bytes):
            srdd = cached.reader.query(key,
                                       layer_name,
                                       layer_zoom,
                                       query_geom,
                                       time_intervals,
                                       query_proj,
                                       num_partitions)
        else:
            raise TypeError("Could not query intersection", query_geom)

        return TiledRasterLayer(layer_type, srdd)
示例#4
0
def get(layer_type,
        uri,
        crs=None,
        max_tile_size=None,
        num_partitions=None,
        chunk_size=None,
        time_tag=None,
        time_format=None,
        s3_client=None):
    """Creates a ``RasterLayer`` from GeoTiffs that are located on the local file system, ``HDFS``,
    or ``S3``.

    Args:
        layer_type (str or :class:`geopyspark.geotrellis.constants.LayerType`): What the spatial type
            of the geotiffs are. This is represented by either constants within ``LayerType`` or by
            a string.

            Note:
                All of the GeoTiffs must have the same saptial type.

        uri (str): The path to a given file/directory.
        crs (str, optional): The CRS that the output tiles should be
            in. The CRS must be in the well-known name format. If ``None``,
            then the CRS that the tiles were originally in will be used.
        max_tile_size (int, optional): The max size of each tile in the
            resulting Layer. If the size is smaller than a read in tile,
            then that tile will be broken into tiles of the specified
            size. If ``None``, then the whole tile will be read in.
        num_partitions (int, optional): The number of repartitions Spark
            will make when the data is repartitioned. If ``None``, then the
            data will not be repartitioned.
        chunk_size (int, optional): How many bytes of the file should be
            read in at a time. If ``None``, then files will be read in 65536
            byte chunks.
        time_tag (str, optional): The name of the tiff tag that contains
            the time stamp for the tile. If ``None``, then the default value
            is: ``TIFFTAG_DATETIME``.
        time_format (str, optional): The pattern of the time stamp for
            java.time.format.DateTimeFormatter to parse. If ``None``,
            then the default value is: ``yyyy:MM:dd HH:mm:ss``.
        s3_client (str, optional): Which ``S3Cleint`` to use when reading
            GeoTiffs from S3. There are currently two options: ``default`` and
            ``mock``. If ``None``, ``defualt`` is used.

            Note:
                ``mock`` should only be used in unit tests and debugging.

    Returns:
        :class:`~geopyspark.geotrellis.rdd.RasterLayer`
    """

    inputs = {k: v for k, v in locals().items() if v is not None}
    pysc = get_spark_context()

    geotiff_rdd = pysc._gateway.jvm.geopyspark.geotrellis.io.geotiff.GeoTiffRDD

    key = map_key_input(LayerType(inputs.pop('layer_type')).value, False)

    if isinstance(uri, list):
        srdd = geotiff_rdd.get(pysc._jsc.sc(), key, inputs.pop('uri'), inputs)
    else:
        srdd = geotiff_rdd.get(pysc._jsc.sc(), key, [inputs.pop('uri')],
                               inputs)

    return RasterLayer(layer_type, srdd)
示例#5
0
def read_value(layer_type,
               uri,
               layer_name,
               layer_zoom,
               col,
               row,
               zdt=None,
               options=None,
               **kwargs):
    """Reads a single ``Tile`` from a GeoTrellis catalog.
    Unlike other functions in this module, this will not return a ``TiledRasterLayer``, but rather a
    GeoPySpark formatted raster. This is the function to use when creating a tile server.

    Note:
        When requesting a tile that does not exist, ``None`` will be returned.

    Args:
        layer_type (str or :class:`geopyspark.geotrellis.constants.LayerType`): What the spatial type
            of the geotiffs are. This is represented by either constants within ``LayerType`` or by
            a string.
        uri (str): The Uniform Resource Identifier used to point towards the desired GeoTrellis
            catalog to be read from. The shape of this string varies depending on backend.
        layer_name (str): The name of the GeoTrellis catalog to be read from.
        layer_zoom (int): The zoom level of the layer that is to be read.
        col (int): The col number of the tile within the layout. Cols run east to west.
        row (int): The row number of the tile within the layout. Row run north to south.
        zdt (``datetime.datetime``): The time stamp of the tile if the data is spatial-temporal.
            This is represented as a ``datetime.datetime.`` instance.  The default value is,
            ``None``. If ``None``, then only the spatial area will be queried.
        options (dict, optional): Additional parameters for reading the tile for specific backends.
            The dictionary is only used for ``Cassandra`` and ``HBase``, no other backend requires
            this to be set.
        **kwargs: The optional parameters can also be set as keywords arguments. The keywords must
            be in camel case. If both options and keywords are set, then the options will be used.

    Returns:
        :class:`~geopyspark.geotrellis.Tile`
    """

    if not _in_bounds(layer_type, uri, layer_name, layer_zoom, col, row):
        return None
    else:
        options = options or kwargs or {}

        if zdt:
            zdt = zdt.astimezone(pytz.utc).isoformat()
        else:
            zdt = ''

        if uri not in _mapped_cached:
            _construct_catalog(get_spark_context(), uri, options)

        cached = _mapped_cached[uri]

        key = map_key_input(LayerType(layer_type).value, True)

        values = cached.value_reader.readTile(key,
                                              layer_name,
                                              layer_zoom,
                                              col,
                                              row,
                                              zdt)

        return multibandtile_decoder(values)