示例#1
0
 def test_empty_constructor_test(self):
     object_rdd = PointRDD(sparkContext=self.sc,
                           InputLocation=point_rdd_input_location,
                           Offset=point_rdd_offset,
                           splitter=point_rdd_splitter,
                           carryInputData=False)
     object_rdd_copy = PointRDD()
     object_rdd_copy.rawJvmSpatialRDD = object_rdd.rawJvmSpatialRDD
     object_rdd_copy.analyze()
示例#2
0
 def test_empty_constructor(self):
     spatial_rdd = PointRDD(sparkContext=self.sc,
                            InputLocation=input_location,
                            Offset=offset,
                            splitter=splitter,
                            carryInputData=True,
                            partitions=num_partitions,
                            newLevel=StorageLevel.MEMORY_ONLY)
     spatial_rdd.buildIndex(IndexType.RTREE, False)
     spatial_rdd_copy = PointRDD()
     spatial_rdd_copy.rawJvmSpatialRDD = spatial_rdd.rawJvmSpatialRDD
     spatial_rdd_copy.analyze()
示例#3
0
    def test_spatial_range_query(self):
        object_rdd = PointRDD(
            self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY
        )
        object_rdd.rawJvmSpatialRDD.persist(StorageLevel.MEMORY_ONLY)
        for _ in range(each_query_loop_times):
            result_size = RangeQuery.SpatialRangeQuery(object_rdd, range_query_window, False, False).count()

        object_rdd = PointRDD(
            self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY
        )
        object_rdd.rawJvmSpatialRDD.persist(StorageLevel.MEMORY_ONLY)
        for _ in range(each_query_loop_times):
            result_size = RangeQuery.SpatialRangeQuery(object_rdd, range_query_window, False, False).count()
    def test_spatial_knn_query_using_index(self):
        point_rdd = PointRDD(self.sc, input_location, offset, splitter, False)
        point_rdd.buildIndex(IndexType.RTREE, False)

        for i in range(self.loop_times):
            result = KNNQuery.SpatialKnnQuery(point_rdd, self.query_point, self.top_k, False)
            assert result.__len__() > -1
示例#5
0
 def load(cls, sc: SparkContext, path: str) -> SpatialRDD:
     jvm = sc._jvm
     point_rdd = PointRDD()
     srdd = SpatialObjectLoaderAdapter(jvm).load_point_spatial_rdd(
         sc._jsc, path)
     point_rdd.set_srdd(srdd)
     return point_rdd
示例#6
0
    def test_creating_point_rdd(self):
        point_rdd = PointRDD(self.spark._sc, point_path, 4,
                             FileDataSplitter.WKT, True)

        point_rdd.analyze()
        cnt = point_rdd.countWithoutDuplicates()
        assert cnt == 12872, f"Point RDD should have 12872 but found {cnt}"
    def test_point_rdd(self):
        spatial_rdd = PointRDD(sparkContext=self.sc,
                               InputLocation=crs_test_point,
                               Offset=0,
                               splitter=splitter,
                               carryInputData=True,
                               partitions=numPartitions,
                               newLevel=StorageLevel.MEMORY_ONLY)

        raw_spatial_rdd = spatial_rdd.rawSpatialRDD.map(
            lambda x: [x.geom, *x.getUserData().split("\t")])

        self.spark.createDataFrame(raw_spatial_rdd).show()

        schema = StructType([
            StructField("geom", GeometryType()),
            StructField("name", StringType())
        ])

        spatial_rdd_with_schema = self.spark.createDataFrame(
            raw_spatial_rdd, schema, verifySchema=False)

        spatial_rdd_with_schema.show()

        assert spatial_rdd_with_schema.take(
            1)[0][0].wkt == "POINT (32.324142 -88.331492)"
    def test_spatial_knn_correctness(self):
        point_rdd = PointRDD(self.sc, input_location, offset, splitter, True,
                             StorageLevel.MEMORY_ONLY, "epsg:4326",
                             "epsg:3005")
        result_no_index = KNNQuery.SpatialKnnQuery(point_rdd, query_point,
                                                   top_k, False)
        point_rdd.buildIndex(IndexType.RTREE, False)
        result_with_index = KNNQuery.SpatialKnnQuery(point_rdd, query_point,
                                                     top_k, True)

        sorted_result_no_index = sorted(
            result_no_index,
            key=lambda geo_data: distance_sorting_functions(
                geo_data, query_point))

        sorted_result_with_index = sorted(
            result_with_index,
            key=lambda geo_data: distance_sorting_functions(
                geo_data, query_point))

        difference = 0
        for x in range(top_k):
            difference += sorted_result_no_index[x].geom.distance(
                sorted_result_with_index[x].geom)

        assert difference == 0
    def test_spatial_join_query_with_polygon_rdd_using_index(self):
        query_rdd = PolygonRDD(self.sc, input_location_query_polygon, splitter,
                               True, num_partitions, StorageLevel.MEMORY_ONLY,
                               "epsg:4326", "epsg:3005")

        spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, True,
                               num_partitions, StorageLevel.MEMORY_ONLY,
                               "epsg:4326", "epsg:3005")

        query_rdd.analyze()
        spatial_rdd.analyze()

        spatial_rdd.spatialPartitioning(grid_type)

        spatial_rdd.buildIndex(IndexType.RTREE, True)

        query_rdd.spatialPartitioning(spatial_rdd.getPartitioner())

        result = JoinQuery.SpatialJoinQuery(spatial_rdd, query_rdd, False,
                                            True).collect()

        assert result[1][0].getUserData() is not None

        for data in result:
            if data[1].__len__() != 0:
                for right_data in data[1]:
                    assert right_data.getUserData() is not None
 def test_saving_to_disc_index_point(self):
     from tests.properties.point_properties import input_location, offset, splitter, num_partitions
     point_rdd = PointRDD(self.sc, input_location, offset, splitter, True,
                          num_partitions, StorageLevel.MEMORY_ONLY)
     point_rdd.buildIndex(IndexType.RTREE, False)
     point_rdd.indexedRawRDD.saveAsObjectFile(
         os.path.join(disc_location, "point_index"))
    def test_raw_spatial_rdd_assignment(self):
        spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, True,
                               num_partitions, StorageLevel.MEMORY_ONLY)
        spatial_rdd.analyze()

        empty_point_rdd = PointRDD()
        empty_point_rdd.rawSpatialRDD = spatial_rdd.rawSpatialRDD
        empty_point_rdd.analyze()
        assert empty_point_rdd.countWithoutDuplicates(
        ) == spatial_rdd.countWithoutDuplicates()
        assert empty_point_rdd.boundaryEnvelope == spatial_rdd.boundaryEnvelope

        assert empty_point_rdd.rawSpatialRDD.map(
            lambda x: x.geom.area).collect()[0] == 0.0
        assert empty_point_rdd.rawSpatialRDD.take(9)[4].getUserData(
        ) == "testattribute0\ttestattribute1\ttestattribute2"
示例#12
0
    def test_spatial_knn_query(self):
        object_rdd = PointRDD(
            self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY
        )
        object_rdd.rawJvmSpatialRDD.persist(StorageLevel.MEMORY_ONLY)

        for _ in range(each_query_loop_times):
            result = KNNQuery.SpatialKnnQuery(object_rdd, knn_query_point, 1000, False)
 def test_spatial_range_query(self):
     spatial_rdd = PointRDD(self.sc, input_location, offset, splitter,
                            False)
     for i in range(self.loop_times):
         result_size = RangeQuery.\
             SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False)\
             .count()
         assert result_size == 2830
    def test_saving_to_disc_spatial_rdd_point(self):
        from tests.properties.point_properties import input_location, offset, splitter, num_partitions

        point_rdd = PointRDD(self.sc, input_location, offset, splitter, True,
                             num_partitions, StorageLevel.MEMORY_ONLY)

        point_rdd.rawJvmSpatialRDD.saveAsObjectFile(
            os.path.join(disc_location, "point"))
示例#15
0
    def test_spatial_knn_query(self):
        point_rdd = PointRDD(self.sc, input_location, offset, splitter, False)

        for i in range(self.loop_times):
            result = KNNQuery.SpatialKnnQuery(point_rdd, self.query_point,
                                              self.top_k, False)
            assert result.__len__() > -1
            assert result[0].getUserData() is not None
示例#16
0
 def test_crs_transformation_spatial_range_query_using_index(self):
     object_rdd = PointRDD(self.sc, point_rdd_input_location, point_rdd_offset,
                          point_rdd_splitter, True, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005")
     object_rdd.buildIndex(point_rdd_index_type, False)
     object_rdd.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY)
     for _ in range(each_query_loop_times):
         result_size = RangeQuery.SpatialRangeQuery(object_rdd, range_query_window, False, True).count()
         assert result_size > -1
示例#17
0
 def test_knn_query(self):
     object_rdd = PointRDD(sparkContext=self.sc,
                           InputLocation=point_rdd_input_location,
                           Offset=point_rdd_offset,
                           splitter=point_rdd_splitter,
                           carryInputData=False)
     for i in range(each_query_loop_times):
         result = KNNQuery.SpatialKnnQuery(object_rdd, knn_query_point,
                                           1000, False)
示例#18
0
 def create_spatial_rdd(self):
     spatial_rdd = PointRDD(sparkContext=self.sc,
                            InputLocation=input_file_location,
                            Offset=offset,
                            splitter=splitter,
                            carryInputData=True,
                            partitions=numPartitions,
                            newLevel=StorageLevel.MEMORY_ONLY)
     return spatial_rdd
示例#19
0
    def test_spatial_knn_query_using_index(self):
        object_rdd = PointRDD(
            self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY
        )
        object_rdd.buildIndex(point_rdd_index_type, False)
        object_rdd.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY)

        for _ in range(each_query_loop_times):
            result = KNNQuery.SpatialKnnQuery(object_rdd, knn_query_point, 1000, True)
示例#20
0
 def test_build_index_without_set_grid(self):
     spatial_rdd = PointRDD(sparkContext=self.sc,
                            InputLocation=input_location,
                            Offset=offset,
                            splitter=splitter,
                            carryInputData=True,
                            partitions=num_partitions,
                            newLevel=StorageLevel.MEMORY_ONLY)
     spatial_rdd.buildIndex(IndexType.RTREE, False)
示例#21
0
    def test_spatial_knn_query(self):
        point_rdd = PointRDD(
            self.sc, input_location, offset, splitter, True, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005"
        )

        for i in range(loop_times):
            result = KNNQuery.SpatialKnnQuery(point_rdd, query_point, top_k, False)
            assert result.__len__() > 0
            assert result[0].getUserData() is not None
示例#22
0
    def test_save_as_geo_json_with_data(self, remove_wkb_directory):
        spatial_rdd = PointRDD(sparkContext=self.sc,
                               InputLocation=inputLocation,
                               Offset=offset,
                               splitter=splitter,
                               carryInputData=True,
                               partitions=numPartitions,
                               newLevel=StorageLevel.MEMORY_ONLY)

        spatial_rdd.saveAsGeoJSON(test_save_as_wkb_with_data)

        result_wkb = PointRDD(sparkContext=self.sc,
                              InputLocation=test_save_as_wkb_with_data,
                              splitter=FileDataSplitter.GEOJSON,
                              carryInputData=True,
                              partitions=numPartitions,
                              newLevel=StorageLevel.MEMORY_ONLY)

        assert result_wkb.rawSpatialRDD.count(
        ) == spatial_rdd.rawSpatialRDD.count()
示例#23
0
 def test_range_query_using_index(self):
     object_rdd = PointRDD(sparkContext=self.sc,
                           InputLocation=point_rdd_input_location,
                           Offset=point_rdd_offset,
                           splitter=point_rdd_splitter,
                           carryInputData=False)
     object_rdd.buildIndex(point_rdd_index_type, False)
     for i in range(each_query_loop_times):
         result_size = RangeQuery.SpatialRangeQuery(object_rdd,
                                                    range_query_window,
                                                    False, True).count
示例#24
0
    def test_spatial_range_query(self):
        object_rdd = PointRDD(sparkContext=self.sc,
                              InputLocation=point_rdd_input_location,
                              Offset=point_rdd_offset,
                              splitter=point_rdd_splitter,
                              carryInputData=False)

        for i in range(each_query_loop_times):
            result_size = RangeQuery.SpatialRangeQuery(object_rdd,
                                                       range_query_window,
                                                       False, False).count()
            logging.info(result_size)
示例#25
0
    def test_equal_partitioning(self):
        spatial_rdd = PointRDD(sparkContext=self.sc,
                               InputLocation=input_location,
                               Offset=offset,
                               splitter=splitter,
                               carryInputData=False,
                               partitions=10,
                               newLevel=StorageLevel.MEMORY_ONLY)
        spatial_rdd.analyze()
        spatial_rdd.spatialPartitioning(GridType.QUADTREE)

        assert spatial_rdd.countWithoutDuplicates(
        ) == spatial_rdd.countWithoutDuplicatesSPRDD()
示例#26
0
 def test_crs_transformed_spatial_range_query(self):
     object_rdd = PointRDD(sparkContext=self.sc,
                           InputLocation=point_rdd_input_location,
                           Offset=point_rdd_offset,
                           splitter=point_rdd_splitter,
                           carryInputData=False,
                           newLevel=StorageLevel.DISK_ONLY,
                           sourceEpsgCRSCode="epsg:4326",
                           targetEpsgCode="epsg:3005")
     for i in range(each_query_loop_times):
         result_size = RangeQuery.SpatialRangeQuery(object_rdd,
                                                    range_query_window,
                                                    False, False)
示例#27
0
    def test_get_crs_transformation(self):
        spatial_rdd = PointRDD(sparkContext=self.sc,
                               InputLocation=crs_test_point,
                               Offset=0,
                               splitter=splitter,
                               carryInputData=True,
                               partitions=numPartitions,
                               newLevel=StorageLevel.MEMORY_ONLY)

        assert not spatial_rdd.getCRStransformation()
        spatial_rdd.CRSTransform("epsg:4326", "epsg:3857")

        assert spatial_rdd.getCRStransformation()
示例#28
0
    def test_spatial_range_query_using_index(self):
        spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, False)

        spatial_rdd.buildIndex(IndexType.RTREE, False)

        for i in range(self.loop_times):
            result_size = RangeQuery.\
                SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False)\
                .count()
            assert result_size == 2830
        assert RangeQuery.SpatialRangeQuery(
            spatial_rdd, self.query_envelope, False, False).take(10)[1].\
                   getUserData() is not None
示例#29
0
    def test_indexed_rdd_assignment(self):
        object_rdd = PointRDD(
            self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True)
        query_window_rdd = CircleRDD(object_rdd, 0.1)
        object_rdd.analyze()
        object_rdd.spatialPartitioning(GridType.QUADTREE)
        object_rdd.buildIndex(IndexType.QUADTREE, True)

        query_window_rdd.spatialPartitioning(object_rdd.getPartitioner())

        object_rdd.buildIndex(IndexType.RTREE, False)

        object_rdd.indexedRDD.persist(StorageLevel.MEMORY_ONLY)
        query_window_rdd.jvmSpatialPartitionedRDD.persist(StorageLevel.MEMORY_ONLY)
        query_window_rdd.jvmSpatialPartitionedRDD.count()
        object_rdd.indexedRDD.count()

        import time

        start = time.time()
        for _ in range(each_query_loop_times):
            result_size = JoinQuery.DistanceJoinQuery(object_rdd, query_window_rdd, True, True).count()
        diff = time.time() - start

        object_rdd = PointRDD(
            self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True)
        query_window_rdd = CircleRDD(object_rdd, 0.1)

        object_rdd.analyze()
        object_rdd.spatialPartitioning(GridType.QUADTREE)
        object_rdd.buildIndex(IndexType.QUADTREE, True)

        query_window_rdd.spatialPartitioning(object_rdd.getPartitioner())

        object_rdd.buildIndex(IndexType.RTREE, False)

        start1 = time.time()
        for _ in range(each_query_loop_times):
            result_size = JoinQuery.DistanceJoinQuery(object_rdd, query_window_rdd, True, True).count()
示例#30
0
    def test_crs_transform(self):
        spatial_rdd = PointRDD(sparkContext=self.sc,
                               InputLocation=crs_test_point,
                               Offset=0,
                               splitter=splitter,
                               carryInputData=True,
                               partitions=numPartitions,
                               newLevel=StorageLevel.MEMORY_ONLY)

        spatial_rdd.CRSTransform("epsg:4326", "epsg:3857")

        assert spatial_rdd.rawSpatialRDD.collect(
        )[0].geom.wkt == "POINT (-9833016.710450118 3805934.914254189)"