Пример #1
0
    def dynamic_rtree_int(self, query_rdd, num_partitions, use_legacy_apis,
                          grid_type, index_type, expected_count):
        spatial_rdd = self.create_point_rdd(input_location, splitter,
                                            num_partitions)

        self.partition_rdds(query_rdd, spatial_rdd, grid_type, use_legacy_apis)
        join_params = JoinParams(True, index_type, JoinBuildSide.LEFT)
        results = JoinQuery.spatialJoin(query_rdd, spatial_rdd,
                                        join_params).collect()

        self.sanity_check_flat_join_results(results)

        assert expected_count == results.__len__()
    def test_dynamic_index_int(self, num_partitions, use_legacy_apis, grid_type, index_type, intersects):
        query_rdd = self.create_polygon_rdd(query_polygon_set, splitter, num_partitions)
        spatial_rdd = self.create_polygon_rdd(input_location, splitter, num_partitions)

        self.partition_rdds(query_rdd, spatial_rdd, grid_type, use_legacy_apis)

        join_params = JoinParams(intersects, index_type, JoinBuildSide.LEFT)
        result = JoinQuery.spatialJoin(query_rdd, spatial_rdd, join_params).collect()

        self.sanity_check_flat_join_results(result)

        expected_count = self.get_expected_with_original_duplicates_count(intersects) \
            if self.expect_to_preserve_original_duplicates(grid_type) else self.get_expected_count(intersects)
        assert expected_count == result.__len__()
Пример #3
0
    def test_spatial_join_query_and_build_index_on_polygons_on_the_fly(self):
        query_window_rdd = PolygonRDD(self.sc, polygon_rdd_input_location,
                                      polygon_rdd_start_offset,
                                      polygon_rdd_end_offset,
                                      polygon_rdd_splitter, True)

        object_rdd = PointRDD(sparkContext=self.sc,
                              InputLocation=point_rdd_input_location,
                              Offset=point_rdd_offset,
                              splitter=point_rdd_splitter,
                              carryInputData=False)
        object_rdd.analyze()
        object_rdd.spatialPartitioning(join_query_partitionin_type)
        query_window_rdd.spatialPartitioning(object_rdd.getPartitioner())

        for i in range(each_query_loop_times):
            join_params = JoinParams(False, polygon_rdd_index_type,
                                     JoinBuildSide.LEFT)
            resultSize = JoinQuery.spatialJoin(query_window_rdd, object_rdd,
                                               join_params).count()