def haversine_distance(p1_lon, p1_lat, p2_lon, p2_lat): """ Compute the haversine distances between an arbitrary list of lon/lat pairs Parameters ---------- p1_lon longitude of first set of coords p1_lat latitude of first set of coords p2_lon longitude of second set of coords p2_lat latitude of second set of coords Returns ------- result : cudf.Series The distance between all pairs of lon/lat coordinates """ p1_lon, p1_lat, p2_lon, p2_lat = normalize_point_columns( as_column(p1_lon), as_column(p1_lat), as_column(p2_lon), as_column(p2_lat), ) return cpp_haversine_distance(p1_lon, p1_lat, p2_lon, p2_lat)
def polygon_bounding_boxes(poly_offsets, ring_offsets, xs, ys): """Compute the minimum bounding-boxes for a set of polygons. Parameters ---------- poly_offsets Begin indices of the first ring in each polygon (i.e. prefix-sum) ring_offsets Begin indices of the first point in each ring (i.e. prefix-sum) xs Polygon point x-coordinates ys Polygon point y-coordinates Returns ------- result : cudf.DataFrame minimum bounding boxes for each polygon x_min : cudf.Series the minimum x-coordinate of each bounding box y_min : cudf.Series the minimum y-coordinate of each bounding box x_max : cudf.Series the maximum x-coordinate of each bounding box y_max : cudf.Series the maximum y-coordinate of each bounding box """ poly_offsets = as_column(poly_offsets, dtype="int32") ring_offsets = as_column(ring_offsets, dtype="int32") xs, ys = normalize_point_columns(as_column(xs), as_column(ys)) return DataFrame._from_table( cpp_polygon_bounding_boxes(poly_offsets, ring_offsets, xs, ys) )
def polyline_bounding_boxes(poly_offsets, xs, ys, expansion_radius): """Compute the minimum bounding-boxes for a set of polylines. Parameters ---------- poly_offsets Begin indices of the first ring in each polyline (i.e. prefix-sum) xs Polyline point x-coordinates ys Polyline point y-coordinates expansion_radius radius of each polyline point Returns ------- result : cudf.DataFrame minimum bounding boxes for each polyline x_min : cudf.Series the minimum x-coordinate of each bounding box y_min : cudf.Series the minimum y-coordinate of each bounding box x_max : cudf.Series the maximum x-coordinate of each bounding box y_max : cudf.Series the maximum y-coordinate of each bounding box """ poly_offsets = as_column(poly_offsets, dtype="int32") xs, ys = normalize_point_columns(as_column(xs), as_column(ys)) return DataFrame._from_data( *cpp_polyline_bounding_boxes(poly_offsets, xs, ys, expansion_radius) )
def trajectory_distances_and_speeds( num_trajectories, object_ids, xs, ys, timestamps ): """ Compute the distance traveled and speed of sets of trajectories Parameters ---------- num_trajectories number of trajectories (unique object ids) object_ids column of object (e.g., vehicle) ids xs column of x-coordinates (in kilometers) ys column of y-coordinates (in kilometers) timestamps column of timestamps in any resolution Returns ------- result : cudf.DataFrame meters : cudf.Series trajectory distance (in kilometers) speed : cudf.Series trajectory speed (in meters/second) Examples -------- Compute the distances and speeds of derived trajectories >>> objects, traj_offsets = cuspatial.derive_trajectories(...) >>> dists_and_speeds = cuspatial.trajectory_distances_and_speeds( len(traj_offsets) objects['object_id'], objects['x'], objects['y'], objects['timestamp'] ) >>> print(dists_and_speeds) distance speed trajectory_id 0 1000.0 100000.000000 1 1000.0 111111.109375 """ object_ids = as_column(object_ids, dtype=np.int32) xs, ys = normalize_point_columns(as_column(xs), as_column(ys)) timestamps = normalize_timestamp_column(as_column(timestamps)) df = DataFrame._from_table( cpp_trajectory_distances_and_speeds( num_trajectories, object_ids, xs, ys, timestamps ) ) df.index.name = "trajectory_id" return df
def trajectory_bounding_boxes(num_trajectories, object_ids, xs, ys): """ Compute the bounding boxes of sets of trajectories. Parameters ---------- num_trajectories number of trajectories (unique object ids) object_ids column of object (e.g., vehicle) ids xs column of x-coordinates (in kilometers) ys column of y-coordinates (in kilometers) Returns ------- result : cudf.DataFrame minimum bounding boxes (in kilometers) for each trajectory x_min : cudf.Series the minimum x-coordinate of each bounding box y_min : cudf.Series the minimum y-coordinate of each bounding box x_max : cudf.Series the maximum x-coordinate of each bounding box y_max : cudf.Series the maximum y-coordinate of each bounding box Examples -------- Compute the minimum bounding boxes of derived trajectories >>> objects, traj_offsets = trajectory.derive_trajectories( [0, 0, 1, 1], # object_id [0, 1, 2, 3], # x [0, 0, 1, 1], # y [0, 10, 0, 10] # timestamp ) >>> traj_bounding_boxes = cuspatial.trajectory_bounding_boxes( len(traj_offsets), objects['object_id'], objects['x'], objects['y'] ) >>> print(traj_bounding_boxes) x_min y_min x_max y_max 0 0.0 0.0 2.0 2.0 1 1.0 1.0 3.0 3.0 """ object_ids = as_column(object_ids, dtype=np.int32) xs, ys = normalize_point_columns(as_column(xs), as_column(ys)) return DataFrame._from_table( cpp_trajectory_bounding_boxes(num_trajectories, object_ids, xs, ys) )
def derive_trajectories(object_ids, xs, ys, timestamps): """ Derive trajectories from object ids, points, and timestamps. Parameters ---------- object_ids column of object (e.g., vehicle) ids xs column of x-coordinates (in kilometers) ys column of y-coordinates (in kilometers) timestamps column of timestamps in any resolution Returns ------- result : tuple (objects, traj_offsets) objects : cudf.DataFrame object_ids, xs, ys, and timestamps sorted by ``(object_id, timestamp)``, used by ``trajectory_bounding_boxes`` and ``trajectory_distances_and_speeds`` traj_offsets : cudf.Series offsets of discovered trajectories Examples -------- Compute sorted objects and discovered trajectories >>> objects, traj_offsets = cuspatial.derive_trajectories( [0, 1, 2, 3], # object_id [0, 0, 1, 1], # x [0, 0, 1, 1], # y [0, 10, 0, 10] # timestamp ) >>> print(traj_offsets) 0 0 1 2 >>> print(objects) object_id x y timestamp 0 0 1 0 0 1 0 0 0 10 2 1 3 1 0 3 1 2 1 10 """ object_ids = as_column(object_ids, dtype=np.int32) xs, ys = normalize_point_columns(as_column(xs), as_column(ys)) timestamps = normalize_timestamp_column(as_column(timestamps)) objects, traj_offsets = cpp_derive_trajectories( object_ids, xs, ys, timestamps ) return DataFrame._from_table(objects), Series(data=traj_offsets)
def points_in_spatial_window(min_x, max_x, min_y, max_y, xs, ys): """ Return only the subset of coordinates that fall within a rectangular window. A point `(x, y)` is inside the query window if and only if ``min_x < x < max_x AND min_y < y < max_y`` The window is specified by minimum and maximum x and y coordinates. Parameters ---------- min_x lower x-coordinate of the query window max_x upper x-coordinate of the query window min_y lower y-coordinate of the query window max_y upper y-coordinate of the query window xs column of x-coordinates that may fall within the window ys column of y-coordinates that may fall within the window Returns ------- result : cudf.DataFrame subset of `(x, y)` pairs above that fall within the window Notes ----- * Swaps ``min_x`` and ``max_x`` if ``min_x > max_x`` * Swaps ``min_y`` and ``max_y`` if ``min_y > max_y`` """ xs, ys = normalize_point_columns(as_column(xs), as_column(ys)) return DataFrame._from_data( *spatial_window.points_in_spatial_window( min_x, max_x, min_y, max_y, xs, ys ) )
def directed_hausdorff_distance(xs, ys, space_offsets): """Compute the directed Hausdorff distances between all pairs of spaces. Parameters ---------- xs column of x-coordinates ys column of y-coordinates space_offsets beginning index of each space, plus the last space's end offset. Returns ------- result : cudf.DataFrame The pairwise directed distance matrix with one row and one column per input space; the value at row i, column j represents the hausdorff distance from space i to space j. Examples -------- The directed Hausdorff distance from one space to another is the greatest of all the distances between any point in the first space to the closest point in the second. `Wikipedia <https://en.wikipedia.org/wiki/Hausdorff_distance>`_ Consider a pair of lines on a grid:: : x -----xyy--- : : x\\ :sub:`0` = (0, 0), x\\ :sub:`1` = (0, 1) y\\ :sub:`0` = (1, 0), y\\ :sub:`1` = (2, 0) x\\ :sub:`0` is the closest point in ``x`` to ``y``. The distance from x\\ :sub:`0` to the farthest point in ``y`` is 2. y\\ :sub:`0` is the closest point in ``y`` to ``x``. The distance from y\\ :sub:`0` to the farthest point in ``x`` is 1.414. Compute the directed hausdorff distances between a set of spaces >>> result = cuspatial.directed_hausdorff_distance( [0, 1, 0, 0], # xs [0, 0, 1, 2], # ys [0, 2, 4], # space_offsets ) >>> print(result) 0 1 0 0.0 1.414214 1 2.0 0.000000 """ num_spaces = len(space_offsets) if num_spaces == 0: return DataFrame() xs, ys = normalize_point_columns(as_column(xs), as_column(ys)) result = cpp_directed_hausdorff_distance( xs, ys, as_column(space_offsets, dtype="uint32"), ) result = result.data_array_view result = result.reshape(num_spaces, num_spaces) return DataFrame(result)
def point_in_polygon( test_points_x, test_points_y, poly_offsets, poly_ring_offsets, poly_points_x, poly_points_y, ): """ Compute from a set of points and a set of polygons which points fall within which polygons. Note that `polygons_(x,y)` must be specified as closed polygons: the first and last coordinate of each polygon must be the same. Parameters ---------- test_points_x x-coordinate of test points test_points_y y-coordinate of test points poly_offsets beginning index of the first ring in each polygon poly_ring_offsets beginning index of the first point in each ring poly_points_x x closed-coordinate of polygon points poly_points_y y closed-coordinate of polygon points Examples -------- Test whether 3 points fall within either of two polygons >>> result = cuspatial.point_in_polygon( [0, -8, 6.0], # test_points_x [0, -8, 6.0], # test_points_y cudf.Series([0, 1], index=['nyc', 'hudson river']), # poly_offsets [0, 3], # ring_offsets [-10, 5, 5, -10, 0, 10, 10, 0], # poly_points_x [-10, -10, 5, 5, 0, 0, 10, 10], # poly_points_y ) # The result of point_in_polygon is a DataFrame of Boolean # values indicating whether each point (rows) falls within # each polygon (columns). >>> print(result) nyc hudson river 0 True True 1 True False 2 False True # Point 0: (0, 0) falls in both polygons # Point 1: (-8, -8) falls in the first polygon # Point 2: (6.0, 6.0) falls in the second polygon note input Series x and y will not be index aligned, but computed as sequential arrays. Returns ------- result : cudf.DataFrame A DataFrame of boolean values indicating whether each point falls within each polygon. """ if len(poly_offsets) == 0: return DataFrame() ( test_points_x, test_points_y, poly_points_x, poly_points_y, ) = normalize_point_columns( as_column(test_points_x), as_column(test_points_y), as_column(poly_points_x), as_column(poly_points_y), ) result = cpp_point_in_polygon( test_points_x, test_points_y, as_column(poly_offsets, dtype="int32"), as_column(poly_ring_offsets, dtype="int32"), poly_points_x, poly_points_y, ) result = gis_utils.pip_bitmap_column_to_binary_array( polygon_bitmap_column=result, width=len(poly_offsets) ) result = DataFrame(result) result = DataFrame._from_data( {name: col.astype("bool") for name, col in result._data.items()} ) result.columns = [x for x in list(reversed(poly_offsets.index))] result = result[list(reversed(result.columns))] return result
def quadtree_point_in_polygon( poly_quad_pairs, quadtree, point_indices, points_x, points_y, poly_offsets, ring_offsets, poly_points_x, poly_points_y, ): """ Test whether the specified points are inside any of the specified polygons. Uses the table of (polygon, quadrant) pairs returned by ``cuspatial.join_quadtree_and_bounding_boxes`` to ensure only the points in the same quadrant as each polygon are tested for intersection. This pre-filtering can dramatically reduce number of points tested per polygon, enabling faster intersection-testing at the expense of extra memory allocated to store the quadtree and sorted point_indices. Parameters ---------- poly_quad_pairs: cudf.DataFrame Table of (polygon, quadrant) index pairs returned by ``cuspatial.join_quadtree_and_bounding_boxes``. quadtree : cudf.DataFrame A complete quadtree for a given area-of-interest bounding box. point_indices : cudf.Series Sorted point indices returned by ``cuspatial.quadtree_on_points`` points_x : cudf.Series x-coordinates of points used to construct the quadtree. points_y : cudf.Series y-coordinates of points used to construct the quadtree. poly_offsets : cudf.Series Begin index of the first ring in each polygon. ring_offsets : cudf.Series Begin index of the first point in each ring. poly_points_x : cudf.Series Polygon point x-coodinates. poly_points_y : cudf.Series Polygon point y-coodinates. Returns ------- result : cudf.DataFrame Indices for each intersecting point and polygon pair. point_offset : cudf.Series Indices of each point that intersects with a polygon. polygon_offset : cudf.Series Indices of each polygon with which a point intersected. """ ( points_x, points_y, poly_points_x, poly_points_y, ) = normalize_point_columns( as_column(points_x), as_column(points_y), as_column(poly_points_x), as_column(poly_points_y), ) return DataFrame._from_table( spatial_join.quadtree_point_in_polygon( poly_quad_pairs, quadtree, as_column(point_indices, dtype="uint32"), points_x, points_y, as_column(poly_offsets, dtype="uint32"), as_column(ring_offsets, dtype="uint32"), poly_points_x, poly_points_y, ))
def quadtree_point_to_nearest_polyline( poly_quad_pairs, quadtree, point_indices, points_x, points_y, poly_offsets, poly_points_x, poly_points_y, ): """ Finds the nearest polyline to each point in a quadrant, and computes the distances between each point and polyline. Uses the table of (polyline, quadrant) pairs returned by ``cuspatial.join_quadtree_and_bounding_boxes`` to ensure distances are computed only for the points in the same quadrant as each polyline. Parameters ---------- poly_quad_pairs: cudf.DataFrame Table of (polyline, quadrant) index pairs returned by ``cuspatial.join_quadtree_and_bounding_boxes``. quadtree : cudf.DataFrame A complete quadtree for a given area-of-interest bounding box. point_indices : cudf.Series Sorted point indices returned by ``cuspatial.quadtree_on_points`` points_x : cudf.Series x-coordinates of points used to construct the quadtree. points_y : cudf.Series y-coordinates of points used to construct the quadtree. poly_offsets : cudf.Series Begin index of the first point in each polyline. poly_points_x : cudf.Series Polyline point x-coodinates. poly_points_y : cudf.Series Polyline point y-coodinates. Returns ------- result : cudf.DataFrame Indices for each point and its nearest polyline, and the distance between the two. point_offset : cudf.Series Indices of each point that intersects with a polyline. polyline_offset : cudf.Series Indices of each polyline with which a point intersected. distance : cudf.Series Distances between each point and its nearest polyline. """ ( points_x, points_y, poly_points_x, poly_points_y, ) = normalize_point_columns( as_column(points_x), as_column(points_y), as_column(poly_points_x), as_column(poly_points_y), ) return DataFrame._from_table( spatial_join.quadtree_point_to_nearest_polyline( poly_quad_pairs, quadtree, as_column(point_indices, dtype="uint32"), points_x, points_y, as_column(poly_offsets, dtype="uint32"), poly_points_x, poly_points_y, ))
def quadtree_on_points(xs, ys, x_min, x_max, y_min, y_max, scale, max_depth, min_size): """ Construct a quadtree from a set of points for a given area-of-interest bounding box. Parameters ---------- xs Column of x-coordinates for each point ys Column of y-coordinates for each point x_min The lower-left x-coordinate of the area of interest bounding box x_max The upper-right x-coordinate of the area of interest bounding box y_min The lower-left y-coordinate of the area of interest bounding box y_max The upper-right y-coordinate of the area of interest bounding box scale Scale to apply to each point's distance from ``(x_min, y_min)`` max_depth Maximum quadtree depth min_size Minimum number of points for a non-leaf quadtree node Returns ------- result : tuple (cudf.Series, cudf.DataFrame) keys_to_points : cudf.Series(dtype=np.int32) A column of sorted keys to original point indices quadtree : cudf.DataFrame A complete quadtree for the set of input points key : cudf.Series(dtype=np.int32) An int32 column of quadrant keys level : cudf.Series(dtype=np.int8) An int8 column of quadtree levels is_quad : cudf.Series(dtype=np.bool_) A boolean column indicating whether the node is a quad or leaf length : cudf.Series(dtype=np.int32) If this is a non-leaf quadrant (i.e. ``is_quad`` is ``True``), this column's value is the number of children in the non-leaf quadrant. Otherwise this column's value is the number of points contained in the leaf quadrant. offset : cudf.Series(dtype=np.int32) If this is a non-leaf quadrant (i.e. ``is_quad`` is ``True``), this column's value is the position of the non-leaf quadrant's first child. Otherwise this column's value is the position of the leaf quadrant's first point. Notes ----- * Swaps ``min_x`` and ``max_x`` if ``min_x > max_x`` * Swaps ``min_y`` and ``max_y`` if ``min_y > max_y`` Examples -------- An example of selecting the ``min_size`` and ``scale`` based on input:: >>> np.random.seed(0) >>> points = cudf.DataFrame({ "x": cudf.Series(np.random.normal(size=120)) * 500, "y": cudf.Series(np.random.normal(size=120)) * 500, }) >>> max_depth = 3 >>> min_size = 50 >>> min_x, min_y, max_x, max_y = (points["x"].min(), points["y"].min(), points["x"].max(), points["y"].max()) >>> scale = max(max_x - min_x, max_y - min_y) // (1 << max_depth) >>> print( "min_size: " + str(min_size) + "\\n" "num_points: " + str(len(points)) + "\\n" "min_x: " + str(min_x) + "\\n" "max_x: " + str(max_x) + "\\n" "min_y: " + str(min_y) + "\\n" "max_y: " + str(max_y) + "\\n" "scale: " + str(scale) + "\\n" ) min_size: 50 num_points: 120 min_x: -1577.4949079170394 max_x: 1435.877311993804 min_y: -1412.7015761122134 max_y: 1492.572387431971 scale: 301.0 >>> key_to_point, quadtree = cuspatial.quadtree_on_points( points["x"], points["y"], min_x, max_x, min_y, max_y, scale, max_depth, min_size ) >>> print(quadtree) key level is_quad length offset 0 0 0 False 15 0 1 1 0 False 27 15 2 2 0 False 12 42 3 3 0 True 4 8 4 4 0 False 5 106 5 6 0 False 6 111 6 9 0 False 2 117 7 12 0 False 1 119 8 12 1 False 22 54 9 13 1 False 18 76 10 14 1 False 9 94 11 15 1 False 3 103 >>> print(key_to_point) 0 63 1 20 2 33 3 66 4 19 ... 115 113 116 3 117 78 118 98 119 24 Length: 120, dtype: int32 """ xs, ys = normalize_point_columns(as_column(xs), as_column(ys)) x_min, x_max, y_min, y_max = ( min(x_min, x_max), max(x_min, x_max), min(y_min, y_max), max(y_min, y_max), ) min_scale = max(x_max - x_min, y_max - y_min) / ((1 << max_depth) + 2) if scale < min_scale: warnings.warn("scale {} is less than required minimum ".format(scale) + "scale {}. Clamping to minimum scale".format(min_scale)) key_to_point, quadtree = cpp_quadtree_on_points( xs, ys, x_min, x_max, y_min, y_max, max(scale, min_scale), max_depth, min_size, ) return Series(key_to_point), DataFrame._from_table(quadtree)