Пример #1
0
def haversine_distance(p1_lon, p1_lat, p2_lon, p2_lat):
    """ Compute the haversine distances between an arbitrary list of lon/lat
    pairs

    Parameters
    ----------
    p1_lon
        longitude of first set of coords
    p1_lat
        latitude of first set of coords
    p2_lon
        longitude of second set of coords
    p2_lat
        latitude of second set of coords

    Returns
    -------
    result : cudf.Series
        The distance between all pairs of lon/lat coordinates
    """

    p1_lon, p1_lat, p2_lon, p2_lat = normalize_point_columns(
        as_column(p1_lon),
        as_column(p1_lat),
        as_column(p2_lon),
        as_column(p2_lat),
    )
    return cpp_haversine_distance(p1_lon, p1_lat, p2_lon, p2_lat)
Пример #2
0
def polygon_bounding_boxes(poly_offsets, ring_offsets, xs, ys):
    """Compute the minimum bounding-boxes for a set of polygons.

    Parameters
    ----------
    poly_offsets
        Begin indices of the first ring in each polygon (i.e. prefix-sum)
    ring_offsets
        Begin indices of the first point in each ring (i.e. prefix-sum)
    xs
        Polygon point x-coordinates
    ys
        Polygon point y-coordinates

    Returns
    -------
    result : cudf.DataFrame
        minimum bounding boxes for each polygon

        x_min : cudf.Series
            the minimum x-coordinate of each bounding box
        y_min : cudf.Series
            the minimum y-coordinate of each bounding box
        x_max : cudf.Series
            the maximum x-coordinate of each bounding box
        y_max : cudf.Series
            the maximum y-coordinate of each bounding box
    """
    poly_offsets = as_column(poly_offsets, dtype="int32")
    ring_offsets = as_column(ring_offsets, dtype="int32")
    xs, ys = normalize_point_columns(as_column(xs), as_column(ys))
    return DataFrame._from_table(
        cpp_polygon_bounding_boxes(poly_offsets, ring_offsets, xs, ys)
    )
Пример #3
0
def polyline_bounding_boxes(poly_offsets, xs, ys, expansion_radius):
    """Compute the minimum bounding-boxes for a set of polylines.

    Parameters
    ----------
    poly_offsets
        Begin indices of the first ring in each polyline (i.e. prefix-sum)
    xs
        Polyline point x-coordinates
    ys
        Polyline point y-coordinates
    expansion_radius
        radius of each polyline point

    Returns
    -------
    result : cudf.DataFrame
        minimum bounding boxes for each polyline

        x_min : cudf.Series
            the minimum x-coordinate of each bounding box
        y_min : cudf.Series
            the minimum y-coordinate of each bounding box
        x_max : cudf.Series
            the maximum x-coordinate of each bounding box
        y_max : cudf.Series
            the maximum y-coordinate of each bounding box
    """
    poly_offsets = as_column(poly_offsets, dtype="int32")
    xs, ys = normalize_point_columns(as_column(xs), as_column(ys))
    return DataFrame._from_data(
        *cpp_polyline_bounding_boxes(poly_offsets, xs, ys, expansion_radius)
    )
Пример #4
0
def trajectory_distances_and_speeds(
    num_trajectories, object_ids, xs, ys, timestamps
):
    """
    Compute the distance traveled and speed of sets of trajectories

    Parameters
    ----------
    num_trajectories
        number of trajectories (unique object ids)
    object_ids
        column of object (e.g., vehicle) ids
    xs
        column of x-coordinates (in kilometers)
    ys
        column of y-coordinates (in kilometers)
    timestamps
        column of timestamps in any resolution

    Returns
    -------
    result : cudf.DataFrame
        meters : cudf.Series
            trajectory distance (in kilometers)
        speed  : cudf.Series
            trajectory speed (in meters/second)

    Examples
    --------
    Compute the distances and speeds of derived trajectories

    >>> objects, traj_offsets = cuspatial.derive_trajectories(...)
    >>> dists_and_speeds = cuspatial.trajectory_distances_and_speeds(
            len(traj_offsets)
            objects['object_id'],
            objects['x'],
            objects['y'],
            objects['timestamp']
        )
    >>> print(dists_and_speeds)
                       distance          speed
        trajectory_id
        0                1000.0  100000.000000
        1                1000.0  111111.109375
    """

    object_ids = as_column(object_ids, dtype=np.int32)
    xs, ys = normalize_point_columns(as_column(xs), as_column(ys))
    timestamps = normalize_timestamp_column(as_column(timestamps))
    df = DataFrame._from_table(
        cpp_trajectory_distances_and_speeds(
            num_trajectories, object_ids, xs, ys, timestamps
        )
    )
    df.index.name = "trajectory_id"
    return df
Пример #5
0
def trajectory_bounding_boxes(num_trajectories, object_ids, xs, ys):
    """ Compute the bounding boxes of sets of trajectories.

    Parameters
    ----------
    num_trajectories
        number of trajectories (unique object ids)
    object_ids
        column of object (e.g., vehicle) ids
    xs
        column of x-coordinates (in kilometers)
    ys
        column of y-coordinates (in kilometers)

    Returns
    -------
    result : cudf.DataFrame
        minimum bounding boxes (in kilometers) for each trajectory

        x_min : cudf.Series
            the minimum x-coordinate of each bounding box
        y_min : cudf.Series
            the minimum y-coordinate of each bounding box
        x_max : cudf.Series
            the maximum x-coordinate of each bounding box
        y_max : cudf.Series
            the maximum y-coordinate of each bounding box

    Examples
    --------
    Compute the minimum bounding boxes of derived trajectories

    >>> objects, traj_offsets = trajectory.derive_trajectories(
            [0, 0, 1, 1],  # object_id
            [0, 1, 2, 3],  # x
            [0, 0, 1, 1],  # y
            [0, 10, 0, 10] # timestamp
        )
    >>> traj_bounding_boxes = cuspatial.trajectory_bounding_boxes(
            len(traj_offsets),
            objects['object_id'],
            objects['x'],
            objects['y']
        )
    >>> print(traj_bounding_boxes)
        x_min   y_min   x_max   y_max
    0     0.0     0.0     2.0     2.0
    1     1.0     1.0     3.0     3.0
    """

    object_ids = as_column(object_ids, dtype=np.int32)
    xs, ys = normalize_point_columns(as_column(xs), as_column(ys))
    return DataFrame._from_table(
        cpp_trajectory_bounding_boxes(num_trajectories, object_ids, xs, ys)
    )
Пример #6
0
def derive_trajectories(object_ids, xs, ys, timestamps):
    """
    Derive trajectories from object ids, points, and timestamps.

    Parameters
    ----------
    object_ids
        column of object (e.g., vehicle) ids
    xs
        column of x-coordinates (in kilometers)
    ys
        column of y-coordinates (in kilometers)
    timestamps
        column of timestamps in any resolution

    Returns
    -------
    result : tuple (objects, traj_offsets)
        objects : cudf.DataFrame
            object_ids, xs, ys, and timestamps sorted by
            ``(object_id, timestamp)``, used by ``trajectory_bounding_boxes``
            and ``trajectory_distances_and_speeds``
        traj_offsets : cudf.Series
            offsets of discovered trajectories

    Examples
    --------
    Compute sorted objects and discovered trajectories

    >>> objects, traj_offsets = cuspatial.derive_trajectories(
            [0, 1, 2, 3],  # object_id
            [0, 0, 1, 1],  # x
            [0, 0, 1, 1],  # y
            [0, 10, 0, 10] # timestamp
        )
    >>> print(traj_offsets)
        0  0
        1  2
    >>> print(objects)
           object_id       x       y  timestamp
        0          0       1       0          0
        1          0       0       0         10
        2          1       3       1          0
        3          1       2       1         10
    """

    object_ids = as_column(object_ids, dtype=np.int32)
    xs, ys = normalize_point_columns(as_column(xs), as_column(ys))
    timestamps = normalize_timestamp_column(as_column(timestamps))
    objects, traj_offsets = cpp_derive_trajectories(
        object_ids, xs, ys, timestamps
    )
    return DataFrame._from_table(objects), Series(data=traj_offsets)
Пример #7
0
def points_in_spatial_window(min_x, max_x, min_y, max_y, xs, ys):
    """ Return only the subset of coordinates that fall within a
    rectangular window.

    A point `(x, y)` is inside the query window if and only if
    ``min_x < x < max_x AND min_y < y < max_y``

    The window is specified by minimum and maximum x and y
    coordinates.

    Parameters
    ----------
    min_x
        lower x-coordinate of the query window
    max_x
        upper x-coordinate of the query window
    min_y
        lower y-coordinate of the query window
    max_y
        upper y-coordinate of the query window
    xs
        column of x-coordinates that may fall within the window
    ys
        column of y-coordinates that may fall within the window

    Returns
    -------
    result : cudf.DataFrame
        subset of `(x, y)` pairs above that fall within the window

    Notes
    -----
    * Swaps ``min_x`` and ``max_x`` if ``min_x > max_x``
    * Swaps ``min_y`` and ``max_y`` if ``min_y > max_y``
    """
    xs, ys = normalize_point_columns(as_column(xs), as_column(ys))
    return DataFrame._from_data(
        *spatial_window.points_in_spatial_window(
            min_x, max_x, min_y, max_y, xs, ys
        )
    )
Пример #8
0
def directed_hausdorff_distance(xs, ys, space_offsets):
    """Compute the directed Hausdorff distances between all pairs of
    spaces.

    Parameters
    ----------
    xs
        column of x-coordinates
    ys
        column of y-coordinates
    space_offsets
        beginning index of each space, plus the last space's end offset.

    Returns
    -------
    result : cudf.DataFrame
        The pairwise directed distance matrix with one row and one
        column per input space; the value at row i, column j represents the
        hausdorff distance from space i to space j.

    Examples
    --------
    The directed Hausdorff distance from one space to another is the greatest
    of all the distances between any point in the first space to the closest
    point in the second.

    `Wikipedia <https://en.wikipedia.org/wiki/Hausdorff_distance>`_

    Consider a pair of lines on a grid::

             :
             x
        -----xyy---
             :
             :

    x\\ :sub:`0` = (0, 0), x\\ :sub:`1` = (0, 1)

    y\\ :sub:`0` = (1, 0), y\\ :sub:`1` = (2, 0)

    x\\ :sub:`0` is the closest point in ``x`` to ``y``. The distance from
    x\\ :sub:`0` to the farthest point in ``y`` is 2.

    y\\ :sub:`0` is the closest point in ``y`` to ``x``. The distance from
    y\\ :sub:`0` to the farthest point in ``x`` is 1.414.

    Compute the directed hausdorff distances between a set of spaces

    >>> result = cuspatial.directed_hausdorff_distance(
            [0, 1, 0, 0], # xs
            [0, 0, 1, 2], # ys
            [0, 2, 4],    # space_offsets
        )
    >>> print(result)
             0         1
        0  0.0  1.414214
        1  2.0  0.000000
    """

    num_spaces = len(space_offsets)
    if num_spaces == 0:
        return DataFrame()
    xs, ys = normalize_point_columns(as_column(xs), as_column(ys))
    result = cpp_directed_hausdorff_distance(
        xs, ys, as_column(space_offsets, dtype="uint32"),
    )
    result = result.data_array_view
    result = result.reshape(num_spaces, num_spaces)
    return DataFrame(result)
Пример #9
0
def point_in_polygon(
    test_points_x,
    test_points_y,
    poly_offsets,
    poly_ring_offsets,
    poly_points_x,
    poly_points_y,
):
    """ Compute from a set of points and a set of polygons which points fall
    within which polygons. Note that `polygons_(x,y)` must be specified as
    closed polygons: the first and last coordinate of each polygon must be
    the same.

    Parameters
    ----------
    test_points_x
        x-coordinate of test points
    test_points_y
        y-coordinate of test points
    poly_offsets
        beginning index of the first ring in each polygon
    poly_ring_offsets
        beginning index of the first point in each ring
    poly_points_x
        x closed-coordinate of polygon points
    poly_points_y
        y closed-coordinate of polygon points

    Examples
    --------

    Test whether 3 points fall within either of two polygons

    >>> result = cuspatial.point_in_polygon(
        [0, -8, 6.0],                             # test_points_x
        [0, -8, 6.0],                             # test_points_y
        cudf.Series([0, 1], index=['nyc', 'hudson river']), # poly_offsets
        [0, 3],                                   # ring_offsets
        [-10, 5, 5, -10, 0, 10, 10, 0],           # poly_points_x
        [-10, -10, 5, 5, 0, 0, 10, 10],           # poly_points_y
    )
    # The result of point_in_polygon is a DataFrame of Boolean
    # values indicating whether each point (rows) falls within
    # each polygon (columns).
    >>> print(result)
                nyc            hudson river
    0          True          True
    1          True         False
    2         False          True
    # Point 0: (0, 0) falls in both polygons
    # Point 1: (-8, -8) falls in the first polygon
    # Point 2: (6.0, 6.0) falls in the second polygon

    note
    input Series x and y will not be index aligned, but computed as
    sequential arrays.

    Returns
    -------
    result : cudf.DataFrame
        A DataFrame of boolean values indicating whether each point falls
        within each polygon.
    """

    if len(poly_offsets) == 0:
        return DataFrame()

    (
        test_points_x,
        test_points_y,
        poly_points_x,
        poly_points_y,
    ) = normalize_point_columns(
        as_column(test_points_x),
        as_column(test_points_y),
        as_column(poly_points_x),
        as_column(poly_points_y),
    )

    result = cpp_point_in_polygon(
        test_points_x,
        test_points_y,
        as_column(poly_offsets, dtype="int32"),
        as_column(poly_ring_offsets, dtype="int32"),
        poly_points_x,
        poly_points_y,
    )

    result = gis_utils.pip_bitmap_column_to_binary_array(
        polygon_bitmap_column=result, width=len(poly_offsets)
    )
    result = DataFrame(result)
    result = DataFrame._from_data(
        {name: col.astype("bool") for name, col in result._data.items()}
    )
    result.columns = [x for x in list(reversed(poly_offsets.index))]
    result = result[list(reversed(result.columns))]
    return result
Пример #10
0
def quadtree_point_in_polygon(
    poly_quad_pairs,
    quadtree,
    point_indices,
    points_x,
    points_y,
    poly_offsets,
    ring_offsets,
    poly_points_x,
    poly_points_y,
):
    """ Test whether the specified points are inside any of the specified
    polygons.

    Uses the table of (polygon, quadrant) pairs returned by
    ``cuspatial.join_quadtree_and_bounding_boxes`` to ensure only the points
    in the same quadrant as each polygon are tested for intersection.

    This pre-filtering can dramatically reduce number of points tested per
    polygon, enabling faster intersection-testing at the expense of extra
    memory allocated to store the quadtree and sorted point_indices.

    Parameters
    ----------
    poly_quad_pairs: cudf.DataFrame
        Table of (polygon, quadrant) index pairs returned by
        ``cuspatial.join_quadtree_and_bounding_boxes``.
    quadtree : cudf.DataFrame
        A complete quadtree for a given area-of-interest bounding box.
    point_indices : cudf.Series
        Sorted point indices returned by ``cuspatial.quadtree_on_points``
    points_x : cudf.Series
        x-coordinates of points used to construct the quadtree.
    points_y : cudf.Series
        y-coordinates of points used to construct the quadtree.
    poly_offsets : cudf.Series
        Begin index of the first ring in each polygon.
    ring_offsets : cudf.Series
        Begin index of the first point in each ring.
    poly_points_x : cudf.Series
        Polygon point x-coodinates.
    poly_points_y : cudf.Series
        Polygon point y-coodinates.

    Returns
    -------
    result : cudf.DataFrame
        Indices for each intersecting point and polygon pair.

        point_offset : cudf.Series
            Indices of each point that intersects with a polygon.
        polygon_offset : cudf.Series
            Indices of each polygon with which a point intersected.
    """

    (
        points_x,
        points_y,
        poly_points_x,
        poly_points_y,
    ) = normalize_point_columns(
        as_column(points_x),
        as_column(points_y),
        as_column(poly_points_x),
        as_column(poly_points_y),
    )
    return DataFrame._from_table(
        spatial_join.quadtree_point_in_polygon(
            poly_quad_pairs,
            quadtree,
            as_column(point_indices, dtype="uint32"),
            points_x,
            points_y,
            as_column(poly_offsets, dtype="uint32"),
            as_column(ring_offsets, dtype="uint32"),
            poly_points_x,
            poly_points_y,
        ))
Пример #11
0
def quadtree_point_to_nearest_polyline(
    poly_quad_pairs,
    quadtree,
    point_indices,
    points_x,
    points_y,
    poly_offsets,
    poly_points_x,
    poly_points_y,
):
    """ Finds the nearest polyline to each point in a quadrant, and computes
    the distances between each point and polyline.

    Uses the table of (polyline, quadrant) pairs returned by
    ``cuspatial.join_quadtree_and_bounding_boxes`` to ensure distances are
    computed only for the points in the same quadrant as each polyline.

    Parameters
    ----------
    poly_quad_pairs: cudf.DataFrame
        Table of (polyline, quadrant) index pairs returned by
        ``cuspatial.join_quadtree_and_bounding_boxes``.
    quadtree : cudf.DataFrame
        A complete quadtree for a given area-of-interest bounding box.
    point_indices : cudf.Series
        Sorted point indices returned by ``cuspatial.quadtree_on_points``
    points_x : cudf.Series
        x-coordinates of points used to construct the quadtree.
    points_y : cudf.Series
        y-coordinates of points used to construct the quadtree.
    poly_offsets : cudf.Series
        Begin index of the first point in each polyline.
    poly_points_x : cudf.Series
        Polyline point x-coodinates.
    poly_points_y : cudf.Series
        Polyline point y-coodinates.

    Returns
    -------
    result : cudf.DataFrame
        Indices for each point and its nearest polyline, and the distance
        between the two.

        point_offset : cudf.Series
            Indices of each point that intersects with a polyline.
        polyline_offset : cudf.Series
            Indices of each polyline with which a point intersected.
        distance : cudf.Series
            Distances between each point and its nearest polyline.
    """
    (
        points_x,
        points_y,
        poly_points_x,
        poly_points_y,
    ) = normalize_point_columns(
        as_column(points_x),
        as_column(points_y),
        as_column(poly_points_x),
        as_column(poly_points_y),
    )
    return DataFrame._from_table(
        spatial_join.quadtree_point_to_nearest_polyline(
            poly_quad_pairs,
            quadtree,
            as_column(point_indices, dtype="uint32"),
            points_x,
            points_y,
            as_column(poly_offsets, dtype="uint32"),
            poly_points_x,
            poly_points_y,
        ))
Пример #12
0
def quadtree_on_points(xs, ys, x_min, x_max, y_min, y_max, scale, max_depth,
                       min_size):
    """ Construct a quadtree from a set of points for a given area-of-interest
        bounding box.

    Parameters
    ----------
    xs
        Column of x-coordinates for each point
    ys
        Column of y-coordinates for each point
    x_min
        The lower-left x-coordinate of the area of interest bounding box
    x_max
        The upper-right x-coordinate of the area of interest bounding box
    y_min
        The lower-left y-coordinate of the area of interest bounding box
    y_max
        The upper-right y-coordinate of the area of interest bounding box
    scale
        Scale to apply to each point's distance from ``(x_min, y_min)``
    max_depth
        Maximum quadtree depth
    min_size
        Minimum number of points for a non-leaf quadtree node

    Returns
    -------
    result : tuple (cudf.Series, cudf.DataFrame)
        keys_to_points  : cudf.Series(dtype=np.int32)
            A column of sorted keys to original point indices
        quadtree        : cudf.DataFrame
            A complete quadtree for the set of input points

            key         : cudf.Series(dtype=np.int32)
                An int32 column of quadrant keys
            level       : cudf.Series(dtype=np.int8)
                An int8 column of quadtree levels
            is_quad     : cudf.Series(dtype=np.bool_)
                A boolean column indicating whether the node is a quad or leaf
            length      : cudf.Series(dtype=np.int32)
                If this is a non-leaf quadrant (i.e. ``is_quad`` is ``True``),
                this column's value is the number of children in the non-leaf
                quadrant.

                Otherwise this column's value is the number of points
                contained in the leaf quadrant.
            offset      : cudf.Series(dtype=np.int32)
                If this is a non-leaf quadrant (i.e. ``is_quad`` is ``True``),
                this column's value is the position of the non-leaf quadrant's
                first child.

                Otherwise this column's value is the position of the leaf
                quadrant's first point.

    Notes
    -----
    * Swaps ``min_x`` and ``max_x`` if ``min_x > max_x``
    * Swaps ``min_y`` and ``max_y`` if ``min_y > max_y``

    Examples
    --------

    An example of selecting the ``min_size`` and ``scale`` based on input::

        >>> np.random.seed(0)
        >>> points = cudf.DataFrame({
                "x": cudf.Series(np.random.normal(size=120)) * 500,
                "y": cudf.Series(np.random.normal(size=120)) * 500,
            })

        >>> max_depth = 3
        >>> min_size = 50
        >>> min_x, min_y, max_x, max_y = (points["x"].min(),
                                          points["y"].min(),
                                          points["x"].max(),
                                          points["y"].max())
        >>> scale = max(max_x - min_x, max_y - min_y) // (1 << max_depth)
        >>> print(
                "min_size:   " + str(min_size) + "\\n"
                "num_points: " + str(len(points)) + "\\n"
                "min_x:      " + str(min_x) + "\\n"
                "max_x:      " + str(max_x) + "\\n"
                "min_y:      " + str(min_y) + "\\n"
                "max_y:      " + str(max_y) + "\\n"
                "scale:      " + str(scale) + "\\n"
            )
        min_size:   50
        num_points: 120
        min_x:      -1577.4949079170394
        max_x:      1435.877311993804
        min_y:      -1412.7015761122134
        max_y:      1492.572387431971
        scale:      301.0

        >>> key_to_point, quadtree = cuspatial.quadtree_on_points(
                points["x"],
                points["y"],
                min_x,
                max_x,
                min_y,
                max_y,
                scale, max_depth, min_size
            )

        >>> print(quadtree)
            key  level  is_quad  length  offset
        0     0      0    False      15       0
        1     1      0    False      27      15
        2     2      0    False      12      42
        3     3      0     True       4       8
        4     4      0    False       5     106
        5     6      0    False       6     111
        6     9      0    False       2     117
        7    12      0    False       1     119
        8    12      1    False      22      54
        9    13      1    False      18      76
        10   14      1    False       9      94
        11   15      1    False       3     103

        >>> print(key_to_point)
        0       63
        1       20
        2       33
        3       66
        4       19
            ...
        115    113
        116      3
        117     78
        118     98
        119     24
        Length: 120, dtype: int32
    """

    xs, ys = normalize_point_columns(as_column(xs), as_column(ys))
    x_min, x_max, y_min, y_max = (
        min(x_min, x_max),
        max(x_min, x_max),
        min(y_min, y_max),
        max(y_min, y_max),
    )

    min_scale = max(x_max - x_min, y_max - y_min) / ((1 << max_depth) + 2)
    if scale < min_scale:
        warnings.warn("scale {} is less than required minimum ".format(scale) +
                      "scale {}. Clamping to minimum scale".format(min_scale))

    key_to_point, quadtree = cpp_quadtree_on_points(
        xs,
        ys,
        x_min,
        x_max,
        y_min,
        y_max,
        max(scale, min_scale),
        max_depth,
        min_size,
    )
    return Series(key_to_point), DataFrame._from_table(quadtree)