def test_creating_point_rdd(self): point_rdd = PointRDD(self.spark._sc, point_path, 4, FileDataSplitter.WKT, True) point_rdd.analyze() cnt = point_rdd.countWithoutDuplicates() assert cnt == 12872, f"Point RDD should have 12872 but found {cnt}"
def test_raw_spatial_rdd_assignment(self): spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY) spatial_rdd.analyze() empty_point_rdd = PointRDD() empty_point_rdd.rawSpatialRDD = spatial_rdd.rawSpatialRDD empty_point_rdd.analyze() assert empty_point_rdd.countWithoutDuplicates( ) == spatial_rdd.countWithoutDuplicates() assert empty_point_rdd.boundaryEnvelope == spatial_rdd.boundaryEnvelope assert empty_point_rdd.rawSpatialRDD.map( lambda x: x.geom.area).collect()[0] == 0.0 assert empty_point_rdd.rawSpatialRDD.take(9)[4].getUserData( ) == "testattribute0\ttestattribute1\ttestattribute2"
def test_equal_partitioning(self): spatial_rdd = PointRDD(sparkContext=self.sc, InputLocation=input_location, Offset=offset, splitter=splitter, carryInputData=False, partitions=10, newLevel=StorageLevel.MEMORY_ONLY) spatial_rdd.analyze() spatial_rdd.spatialPartitioning(GridType.EQUALGRID) for envelope in spatial_rdd.grids: print("PointRDD spatial partitioning grids: " + str(envelope)) assert spatial_rdd.countWithoutDuplicates( ) == spatial_rdd.countWithoutDuplicatesSPRDD()
def test_r_tree_spatial_partitioning(self): spatial_rdd = PointRDD(sparkContext=self.sc, InputLocation=input_location, Offset=offset, splitter=splitter, carryInputData=True, partitions=10, newLevel=StorageLevel.MEMORY_ONLY) spatial_rdd.analyze() spatial_rdd.spatialPartitioning(GridType.RTREE) for envelope in spatial_rdd.grids: print(envelope) assert spatial_rdd.countWithoutDuplicates( ) == spatial_rdd.countWithoutDuplicatesSPRDD()