def SpatialJoinQueryFlat(cls, spatialRDD: SpatialRDD, queryRDD: SpatialRDD, useIndex: bool, considerBoundaryIntersection: bool) -> RDD: """ Function takes SpatialRDD and other SpatialRDD and based on two parameters - useIndex - considerBoundaryIntersection creates RDD with result of Spatial Join operation. It Returns RDD[GeoData, GeoData] :param spatialRDD: SpatialRDD :param queryRDD: SpatialRDD :param useIndex: bool :param considerBoundaryIntersection: bool :return: RDD >> spatial_join_result = JoinQuery.SpatialJoinQueryFlat( >> spatialRDD, queryRDD, useIndex, considerBoundaryIntersection >> ) >> spatial_join_result.collect() [[GeoData(Polygon, ), GeoData()], [GeoData(), GeoData()], [GeoData(), GeoData()]] """ pair_rdd = JoinQueryRaw.SpatialJoinQueryFlat( spatialRDD, queryRDD, useIndex, considerBoundaryIntersection) return pair_rdd.to_rdd()
def test_spatial_join_query_flat_to_df(self): poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1, False, False) areas_polygon_rdd = WktReader.readToGeometryRDD( self.sc, areas_csv_path, 1, False, False) poi_point_rdd.analyze() areas_polygon_rdd.analyze() poi_point_rdd.spatialPartitioning(GridType.QUADTREE) areas_polygon_rdd.spatialPartitioning(poi_point_rdd.getPartitioner()) jvm_sedona_rdd = JoinQueryRaw.SpatialJoinQueryFlat( poi_point_rdd, areas_polygon_rdd, False, True) pois_within_areas_with_default_column_names = Adapter.toDf( jvm_sedona_rdd, self.spark) assert pois_within_areas_with_default_column_names.count() == 5 pois_within_areas_with_passed_column_names = Adapter.toDf( jvm_sedona_rdd, ["area_id", "area_name"], ["poi_id", "poi_name"], self.spark) assert pois_within_areas_with_passed_column_names.count() == 5 assert pois_within_areas_with_passed_column_names.columns == [ "leftgeometry", "area_id", "area_name", "rightgeometry", "poi_id", "poi_name" ] assert pois_within_areas_with_default_column_names.schema == StructType( [ StructField("leftgeometry", GeometryType()), StructField("rightgeometry", GeometryType()), ]) left_geometries_raw = pois_within_areas_with_default_column_names. \ selectExpr("ST_AsText(leftgeometry)"). \ collect() left_geometries = self.__row_to_list(left_geometries_raw) right_geometries_raw = pois_within_areas_with_default_column_names. \ selectExpr("ST_AsText(rightgeometry)"). \ collect() right_geometries = self.__row_to_list(right_geometries_raw) # Ignore the ordering of these assert set(geom[0] for geom in left_geometries) == set([ 'POLYGON ((0 4, -3 3, -8 6, -6 8, -2 9, 0 4))', 'POLYGON ((10 3, 10 6, 14 6, 14 3, 10 3))', 'POLYGON ((2 2, 2 4, 3 5, 7 5, 9 3, 8 1, 4 1, 2 2))', 'POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))', 'POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))' ]) assert set(geom[0] for geom in right_geometries) == set([ 'POINT (-3 5)', 'POINT (11 5)', 'POINT (4 3)', 'POINT (-1 -1)', 'POINT (-4 -5)' ])
def test_spatial_join_to_spatial_rdd(self): poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1, False, False) areas_polygon_rdd = WktReader.readToGeometryRDD(self.sc, areas_csv_path, 1, False, False) poi_point_rdd.analyze() areas_polygon_rdd.analyze() poi_point_rdd.spatialPartitioning(GridType.QUADTREE) areas_polygon_rdd.spatialPartitioning(poi_point_rdd.getPartitioner()) jvm_sedona_rdd = JoinQueryRaw.spatialJoin(poi_point_rdd, areas_polygon_rdd, JoinParams(considerBoundaryIntersection=True)) sedona_rdd = jvm_sedona_rdd.to_rdd().collect() assert sedona_rdd.__len__() == 5
def test_spatial_join_query_flat_to_df(self): poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1, False, False) areas_polygon_rdd = WktReader.readToGeometryRDD(self.sc, areas_csv_path, 1, False, False) poi_point_rdd.analyze() areas_polygon_rdd.analyze() poi_point_rdd.spatialPartitioning(GridType.QUADTREE) areas_polygon_rdd.spatialPartitioning(poi_point_rdd.getPartitioner()) jvm_sedona_rdd = JoinQueryRaw.SpatialJoinQueryFlat( poi_point_rdd, areas_polygon_rdd, False, True) assert jvm_sedona_rdd.to_rdd().collect().__len__() == 5
def test_distance_join_query_flat_to_df(self): poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1, False, False) circle_rdd = CircleRDD(poi_point_rdd, 2.0) circle_rdd.analyze() poi_point_rdd.analyze() poi_point_rdd.spatialPartitioning(GridType.QUADTREE) circle_rdd.spatialPartitioning(poi_point_rdd.getPartitioner()) jvm_sedona_rdd = JoinQueryRaw.DistanceJoinQueryFlat(poi_point_rdd, circle_rdd, False, True) assert jvm_sedona_rdd.to_rdd().collect().__len__() == 10
def spatialJoin(cls, queryWindowRDD: SpatialRDD, objectRDD: SpatialRDD, joinParams: JoinParams) -> RDD: """ :param queryWindowRDD: SpatialRDD :param objectRDD: SpatialRDD :param joinParams: JoinParams :return: """ pair_rdd = JoinQueryRaw.spatialJoin(queryWindowRDD, objectRDD, joinParams) return pair_rdd.to_rdd()
def DistanceJoinQuery(cls, spatialRDD: SpatialRDD, queryRDD: SpatialRDD, useIndex: bool, considerBoundaryIntersection: bool) -> RDD: """ :param spatialRDD: SpatialRDD :param queryRDD: SpatialRDD :param useIndex: bool :param considerBoundaryIntersection: bool :return: """ pair_rdd = JoinQueryRaw.DistanceJoinQuery( spatialRDD, queryRDD, useIndex, considerBoundaryIntersection) return pair_rdd.to_rdd()
def DistanceJoinQueryFlat(cls, spatialRDD: SpatialRDD, queryRDD: SpatialRDD, useIndex: bool, considerBoundaryIntersection: bool) -> RDD: """ :param spatialRDD: SpatialRDD :param queryRDD: SpatialRDD :param useIndex: bool :param considerBoundaryIntersection: bool >> spatial_rdd = >> query_rdd = >> spatial_join_result = JoinQuery.DistanceJoinQueryFlat(spatial_rdd, query_rdd, True, True) >> spatial_join_result.collect() [GeoData(), GeoData()] :return: """ pair_rdd = JoinQueryRaw.DistanceJoinQueryFlat( spatialRDD, queryRDD, useIndex, considerBoundaryIntersection) return pair_rdd.to_rdd()
def test_distance_join_query_flat_to_df(self): poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1, False, False) circle_rdd = CircleRDD(poi_point_rdd, 2.0) circle_rdd.analyze() poi_point_rdd.analyze() poi_point_rdd.spatialPartitioning(GridType.QUADTREE) circle_rdd.spatialPartitioning(poi_point_rdd.getPartitioner()) jvm_sedona_rdd = JoinQueryRaw.DistanceJoinQueryFlat( poi_point_rdd, circle_rdd, False, True) df_sedona_rdd = Adapter.toDf(jvm_sedona_rdd, ["poi_from_id", "poi_from_name"], ["poi_to_id", "poi_to_name"], self.spark) assert df_sedona_rdd.count() == 10 assert df_sedona_rdd.columns == [ "leftgeometry", "poi_from_id", "poi_from_name", "rightgeometry", "poi_to_id", "poi_to_name" ]
def test_spatial_join_to_df(self): poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1, False, False) areas_polygon_rdd = WktReader.readToGeometryRDD( self.sc, areas_csv_path, 1, False, False) poi_point_rdd.analyze() areas_polygon_rdd.analyze() poi_point_rdd.spatialPartitioning(GridType.QUADTREE) areas_polygon_rdd.spatialPartitioning(poi_point_rdd.getPartitioner()) jvm_sedona_rdd = JoinQueryRaw.spatialJoin(poi_point_rdd, areas_polygon_rdd, JoinParams()) sedona_df = Adapter.toDf(jvm_sedona_rdd, ["area_id", "area_name"], ["poi_id", "poi_name"], self.spark) assert sedona_df.count() == 5 assert sedona_df.columns == [ "leftgeometry", "area_id", "area_name", "rightgeometry", "poi_id", "poi_name" ]