Exemplo n.º 1
0
    def SpatialJoinQueryFlat(cls, spatialRDD: SpatialRDD, queryRDD: SpatialRDD,
                             useIndex: bool,
                             considerBoundaryIntersection: bool) -> RDD:
        """
        Function takes SpatialRDD and other SpatialRDD and based on two parameters
        - useIndex
        - considerBoundaryIntersection
        creates RDD with result of Spatial Join operation. It Returns RDD[GeoData, GeoData]

        :param spatialRDD: SpatialRDD
        :param queryRDD: SpatialRDD
        :param useIndex: bool
        :param considerBoundaryIntersection: bool
        :return: RDD

        >> spatial_join_result = JoinQuery.SpatialJoinQueryFlat(
        >>      spatialRDD, queryRDD, useIndex, considerBoundaryIntersection
        >> )
        >> spatial_join_result.collect()
        [[GeoData(Polygon, ), GeoData()], [GeoData(), GeoData()], [GeoData(), GeoData()]]
        """

        pair_rdd = JoinQueryRaw.SpatialJoinQueryFlat(
            spatialRDD, queryRDD, useIndex, considerBoundaryIntersection)
        return pair_rdd.to_rdd()
Exemplo n.º 2
0
    def test_spatial_join_query_flat_to_df(self):
        poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1,
                                                    False, False)
        areas_polygon_rdd = WktReader.readToGeometryRDD(
            self.sc, areas_csv_path, 1, False, False)
        poi_point_rdd.analyze()
        areas_polygon_rdd.analyze()

        poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
        areas_polygon_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())

        jvm_sedona_rdd = JoinQueryRaw.SpatialJoinQueryFlat(
            poi_point_rdd, areas_polygon_rdd, False, True)

        pois_within_areas_with_default_column_names = Adapter.toDf(
            jvm_sedona_rdd, self.spark)

        assert pois_within_areas_with_default_column_names.count() == 5

        pois_within_areas_with_passed_column_names = Adapter.toDf(
            jvm_sedona_rdd, ["area_id", "area_name"], ["poi_id", "poi_name"],
            self.spark)

        assert pois_within_areas_with_passed_column_names.count() == 5

        assert pois_within_areas_with_passed_column_names.columns == [
            "leftgeometry", "area_id", "area_name", "rightgeometry", "poi_id",
            "poi_name"
        ]

        assert pois_within_areas_with_default_column_names.schema == StructType(
            [
                StructField("leftgeometry", GeometryType()),
                StructField("rightgeometry", GeometryType()),
            ])

        left_geometries_raw = pois_within_areas_with_default_column_names. \
            selectExpr("ST_AsText(leftgeometry)"). \
            collect()

        left_geometries = self.__row_to_list(left_geometries_raw)

        right_geometries_raw = pois_within_areas_with_default_column_names. \
            selectExpr("ST_AsText(rightgeometry)"). \
            collect()

        right_geometries = self.__row_to_list(right_geometries_raw)

        # Ignore the ordering of these
        assert set(geom[0] for geom in left_geometries) == set([
            'POLYGON ((0 4, -3 3, -8 6, -6 8, -2 9, 0 4))',
            'POLYGON ((10 3, 10 6, 14 6, 14 3, 10 3))',
            'POLYGON ((2 2, 2 4, 3 5, 7 5, 9 3, 8 1, 4 1, 2 2))',
            'POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))',
            'POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))'
        ])
        assert set(geom[0] for geom in right_geometries) == set([
            'POINT (-3 5)', 'POINT (11 5)', 'POINT (4 3)', 'POINT (-1 -1)',
            'POINT (-4 -5)'
        ])
    def test_spatial_join_to_spatial_rdd(self):
        poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1, False, False)
        areas_polygon_rdd = WktReader.readToGeometryRDD(self.sc, areas_csv_path, 1, False, False)
        poi_point_rdd.analyze()
        areas_polygon_rdd.analyze()

        poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
        areas_polygon_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())

        jvm_sedona_rdd = JoinQueryRaw.spatialJoin(poi_point_rdd, areas_polygon_rdd, JoinParams(considerBoundaryIntersection=True))
        sedona_rdd = jvm_sedona_rdd.to_rdd().collect()
        assert sedona_rdd.__len__() == 5
    def test_spatial_join_query_flat_to_df(self):
        poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1, False, False)
        areas_polygon_rdd = WktReader.readToGeometryRDD(self.sc, areas_csv_path, 1, False, False)
        poi_point_rdd.analyze()
        areas_polygon_rdd.analyze()

        poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
        areas_polygon_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())

        jvm_sedona_rdd = JoinQueryRaw.SpatialJoinQueryFlat(
            poi_point_rdd, areas_polygon_rdd, False, True)

        assert jvm_sedona_rdd.to_rdd().collect().__len__() == 5
    def test_distance_join_query_flat_to_df(self):
        poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1, False, False)
        circle_rdd = CircleRDD(poi_point_rdd, 2.0)

        circle_rdd.analyze()
        poi_point_rdd.analyze()

        poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
        circle_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())

        jvm_sedona_rdd = JoinQueryRaw.DistanceJoinQueryFlat(poi_point_rdd, circle_rdd, False, True)

        assert jvm_sedona_rdd.to_rdd().collect().__len__() == 10
Exemplo n.º 6
0
    def spatialJoin(cls, queryWindowRDD: SpatialRDD, objectRDD: SpatialRDD,
                    joinParams: JoinParams) -> RDD:
        """

        :param queryWindowRDD: SpatialRDD
        :param objectRDD: SpatialRDD
        :param joinParams: JoinParams
        :return:
        """

        pair_rdd = JoinQueryRaw.spatialJoin(queryWindowRDD, objectRDD,
                                            joinParams)
        return pair_rdd.to_rdd()
Exemplo n.º 7
0
    def DistanceJoinQuery(cls, spatialRDD: SpatialRDD, queryRDD: SpatialRDD,
                          useIndex: bool,
                          considerBoundaryIntersection: bool) -> RDD:
        """

        :param spatialRDD: SpatialRDD
        :param queryRDD: SpatialRDD
        :param useIndex: bool
        :param considerBoundaryIntersection: bool
        :return:
        """

        pair_rdd = JoinQueryRaw.DistanceJoinQuery(
            spatialRDD, queryRDD, useIndex, considerBoundaryIntersection)
        return pair_rdd.to_rdd()
Exemplo n.º 8
0
    def DistanceJoinQueryFlat(cls, spatialRDD: SpatialRDD,
                              queryRDD: SpatialRDD, useIndex: bool,
                              considerBoundaryIntersection: bool) -> RDD:
        """

        :param spatialRDD: SpatialRDD
        :param queryRDD: SpatialRDD
        :param useIndex: bool
        :param considerBoundaryIntersection: bool

        >> spatial_rdd =
        >> query_rdd =
        >> spatial_join_result = JoinQuery.DistanceJoinQueryFlat(spatial_rdd, query_rdd, True, True)
        >> spatial_join_result.collect()
        [GeoData(), GeoData()]
        :return:
        """

        pair_rdd = JoinQueryRaw.DistanceJoinQueryFlat(
            spatialRDD, queryRDD, useIndex, considerBoundaryIntersection)
        return pair_rdd.to_rdd()
    def test_distance_join_query_flat_to_df(self):
        poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1,
                                                    False, False)
        circle_rdd = CircleRDD(poi_point_rdd, 2.0)

        circle_rdd.analyze()
        poi_point_rdd.analyze()

        poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
        circle_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())

        jvm_sedona_rdd = JoinQueryRaw.DistanceJoinQueryFlat(
            poi_point_rdd, circle_rdd, False, True)
        df_sedona_rdd = Adapter.toDf(jvm_sedona_rdd,
                                     ["poi_from_id", "poi_from_name"],
                                     ["poi_to_id", "poi_to_name"], self.spark)

        assert df_sedona_rdd.count() == 10
        assert df_sedona_rdd.columns == [
            "leftgeometry", "poi_from_id", "poi_from_name", "rightgeometry",
            "poi_to_id", "poi_to_name"
        ]
    def test_spatial_join_to_df(self):
        poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1,
                                                    False, False)
        areas_polygon_rdd = WktReader.readToGeometryRDD(
            self.sc, areas_csv_path, 1, False, False)
        poi_point_rdd.analyze()
        areas_polygon_rdd.analyze()

        poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
        areas_polygon_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())

        jvm_sedona_rdd = JoinQueryRaw.spatialJoin(poi_point_rdd,
                                                  areas_polygon_rdd,
                                                  JoinParams())
        sedona_df = Adapter.toDf(jvm_sedona_rdd, ["area_id", "area_name"],
                                 ["poi_id", "poi_name"], self.spark)

        assert sedona_df.count() == 5
        assert sedona_df.columns == [
            "leftgeometry", "area_id", "area_name", "rightgeometry", "poi_id",
            "poi_name"
        ]