def test_spatial_join_query_with_polygon_rdd_using_index(self):
        query_rdd = PolygonRDD(self.sc, input_location_query_polygon, splitter,
                               True, num_partitions, StorageLevel.MEMORY_ONLY,
                               "epsg:4326", "epsg:3005")

        spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, True,
                               num_partitions, StorageLevel.MEMORY_ONLY,
                               "epsg:4326", "epsg:3005")

        query_rdd.analyze()
        spatial_rdd.analyze()

        spatial_rdd.spatialPartitioning(grid_type)

        spatial_rdd.buildIndex(IndexType.RTREE, True)

        query_rdd.spatialPartitioning(spatial_rdd.getPartitioner())

        result = JoinQuery.SpatialJoinQuery(spatial_rdd, query_rdd, False,
                                            True).collect()

        assert result[1][0].getUserData() is not None

        for data in result:
            if data[1].__len__() != 0:
                for right_data in data[1]:
                    assert right_data.getUserData() is not None
Exemplo n.º 2
0
 def test_build_index_without_set_grid(self):
     spatial_rdd = PolygonRDD(self.sc,
                              input_location,
                              FileDataSplitter.CSV,
                              carryInputData=True,
                              partitions=num_partitions,
                              newLevel=StorageLevel.MEMORY_ONLY)
     spatial_rdd.analyze()
     spatial_rdd.buildIndex(IndexType.RTREE, False)
Exemplo n.º 3
0
    def test_creating_polygon_rdd(self):
        polygon_rdd = PolygonRDD(self.spark._sc, counties_path, 2, 3,
                                 FileDataSplitter.WKT, True)

        polygon_rdd.analyze()

        cnt = polygon_rdd.countWithoutDuplicates()

        assert cnt == 407, f"Polygon RDD should have 407 but found {cnt}"
Exemplo n.º 4
0
    def test_geojson_to_dataframe(self):
        spatial_rdd = PolygonRDD(self.spark.sparkContext,
                                 geojson_input_location,
                                 FileDataSplitter.GEOJSON, True)

        spatial_rdd.analyze()
        Adapter.toDf(spatial_rdd, self.spark).show()
        df = Adapter.toDf(spatial_rdd, self.spark)

        assert (df.columns[1] == "STATEFP")
Exemplo n.º 5
0
    def test_geojson_to_dataframe(self):
        spatial_rdd = PolygonRDD(self.spark.sparkContext,
                                 geojson_input_location,
                                 FileDataSplitter.GEOJSON, True)

        spatial_rdd.analyze()

        df = Adapter.toDf(spatial_rdd, self.spark).\
            withColumn("geometry", expr("ST_GeomFromWKT(geometry)"))
        df.show()
        assert (df.columns[1] == "STATEFP")
Exemplo n.º 6
0
 def test_wkb_constructor(self):
     spatial_rdd = PolygonRDD(sparkContext=self.sc,
                              InputLocation=input_location_wkb,
                              splitter=FileDataSplitter.WKB,
                              carryInputData=True,
                              newLevel=StorageLevel.MEMORY_ONLY)
     spatial_rdd.analyze()
     assert spatial_rdd.approximateTotalCount == 103
     assert spatial_rdd.boundaryEnvelope is not None
     assert spatial_rdd.rawSpatialRDD.take(1)[0].getUserData(
     ) == "31\t039\t00835841\t31039\tCuming\tCuming County\t06\tH1\tG4020\t\t\t\tA\t1477895811\t10447360\t+41.9158651\t-096.7885168"
Exemplo n.º 7
0
 def test_load_id_column_data_check(self):
     spatial_rdd = PolygonRDD(self.spark.sparkContext,
                              geojson_id_input_location,
                              FileDataSplitter.GEOJSON, True)
     spatial_rdd.analyze()
     df = Adapter.toDf(spatial_rdd, self.spark)
     df.show()
     try:
         assert df.columns.__len__() == 3
     except AssertionError:
         assert df.columns.__len__() == 4
     assert df.count() == 1
Exemplo n.º 8
0
    def test_voronoi_spatial_partitioning(self):
        spatial_rdd = PolygonRDD(sparkContext=self.sc,
                                 InputLocation=input_location,
                                 splitter=FileDataSplitter.CSV,
                                 carryInputData=True,
                                 partitions=10,
                                 newLevel=StorageLevel.MEMORY_ONLY)
        spatial_rdd.analyze()
        spatial_rdd.spatialPartitioning(GridType.VORONOI)

        for envelope in spatial_rdd.grids:
            print(envelope)
Exemplo n.º 9
0
    def test_hilbert_curve_spatial_partitioning(self):
        spatial_rdd = PolygonRDD(sparkContext=self.sc,
                                 InputLocation=input_location,
                                 splitter=splitter,
                                 carryInputData=True,
                                 partitions=10,
                                 newLevel=StorageLevel.MEMORY_ONLY)
        spatial_rdd.analyze()
        spatial_rdd.spatialPartitioning(GridType.HILBERT)

        for envelope in spatial_rdd.grids:
            print(envelope)
Exemplo n.º 10
0
 def test_empty_constructor(self):
     spatial_rdd = PolygonRDD(sparkContext=self.sc,
                              InputLocation=input_location,
                              splitter=splitter,
                              carryInputData=True,
                              partitions=num_partitions,
                              newLevel=StorageLevel.MEMORY_ONLY)
     spatial_rdd.analyze()
     spatial_rdd.spatialPartitioning(grid_type)
     spatial_rdd.buildIndex(IndexType.RTREE, True)
     spatial_rdd_copy = PolygonRDD()
     spatial_rdd_copy.rawJvmSpatialRDD = spatial_rdd.rawJvmSpatialRDD
     spatial_rdd_copy.analyze()
Exemplo n.º 11
0
 def test_geojson_constructor(self):
     spatial_rdd = PolygonRDD(
         sparkContext=self.sc,
         InputLocation=input_location_geo_json,
         splitter=FileDataSplitter.GEOJSON,
         carryInputData=True,
         partitions=4,
         newLevel=StorageLevel.MEMORY_ONLY
     )
     spatial_rdd.analyze()
     assert spatial_rdd.approximateTotalCount == 1001
     assert spatial_rdd.boundaryEnvelope is not None
     assert spatial_rdd.rawSpatialRDD.take(1)[0].getUserData() == "01\t077\t011501\t5\t1500000US010770115015\t010770115015\t5\tBG\t6844991\t32636"
     assert spatial_rdd.rawSpatialRDD.take(2)[1].getUserData() == "01\t045\t021102\t4\t1500000US010450211024\t010450211024\t4\tBG\t11360854\t0"
     assert spatial_rdd.fieldNames == ["STATEFP", "COUNTYFP", "TRACTCE", "BLKGRPCE", "AFFGEOID", "GEOID", "NAME", "LSAD", "ALAND", "AWATER"]
Exemplo n.º 12
0
    def test_to_df_srdd_fn_spark(self):
        spatial_rdd = PolygonRDD(self.spark.sparkContext,
                                 geojson_input_location,
                                 FileDataSplitter.GEOJSON, True)
        spatial_rdd.analyze()
        assert spatial_rdd.approximateTotalCount == 1001

        spatial_columns = [
            "state_id", "county_id", "tract_id", "bg_id", "fips", "fips_short",
            "bg_nr", "type", "code1", "code2"
        ]
        spatial_df = Adapter.toDf(spatial_rdd, spatial_columns, self.spark)

        spatial_df.show()

        assert spatial_df.columns == ["geometry", *spatial_columns]
        assert spatial_df.count() == 1001
Exemplo n.º 13
0
    def test_constructor(self):
        spatial_rdd_core = PolygonRDD(
            sparkContext=self.sc,
            InputLocation=input_location,
            splitter=splitter,
            carryInputData=True,
            partitions=num_partitions,
            newLevel=StorageLevel.MEMORY_ONLY
        )
        self.compare_spatial_rdd(spatial_rdd_core, input_boundary)

        spatial_rdd_core = PolygonRDD(
            self.sc,
            input_location,
            splitter,
            True,
            num_partitions,
            StorageLevel.MEMORY_ONLY
        )

        self.compare_spatial_rdd(spatial_rdd_core, input_boundary)
        spatial_rdd = PolygonRDD(rawSpatialRDD=spatial_rdd_core.rawJvmSpatialRDD)
        self.compare_spatial_rdd(spatial_rdd, input_boundary)
        spatial_rdd = PolygonRDD(spatial_rdd_core.rawJvmSpatialRDD, "epsg:4326", "epsg:5070")
        self.compare_spatial_rdd(spatial_rdd, query_envelope)
        assert spatial_rdd.getSourceEpsgCode() == "epsg:4326"
        assert spatial_rdd.getTargetEpsgCode() == "epsg:5070"
        spatial_rdd = PolygonRDD(rawSpatialRDD=spatial_rdd_core.rawJvmSpatialRDD, sourceEpsgCode="epsg:4326", targetEpsgCode="epsg:5070")
        assert spatial_rdd.getSourceEpsgCode() == "epsg:4326"
        assert spatial_rdd.getTargetEpsgCode() == "epsg:5070"
        self.compare_spatial_rdd(spatial_rdd, query_envelope)
        spatial_rdd = PolygonRDD(rawSpatialRDD=spatial_rdd.rawJvmSpatialRDD, newLevel=StorageLevel.MEMORY_ONLY)
        self.compare_spatial_rdd(spatial_rdd, query_envelope)
        spatial_rdd = PolygonRDD(spatial_rdd_core.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY)
        self.compare_spatial_rdd(spatial_rdd, input_boundary)
        spatial_rdd = PolygonRDD()

        query_window_rdd = PolygonRDD(
            self.sc,
            polygon_rdd_input_location,
            polygon_rdd_start_offset,
            polygon_rdd_end_offset,
            polygon_rdd_splitter,
            True,
            2
        )
        assert query_window_rdd.analyze()
        assert query_window_rdd.approximateTotalCount == 3000

        query_window_rdd = PolygonRDD(
            self.sc,
            polygon_rdd_input_location,
            polygon_rdd_start_offset,
            polygon_rdd_end_offset,
            polygon_rdd_splitter,
            True
        )
        assert query_window_rdd.analyze()
        assert query_window_rdd.approximateTotalCount == 3000

        spatial_rdd_core = PolygonRDD(
            self.sc,
            input_location,
            splitter,
            True,
            num_partitions
        )

        self.compare_spatial_rdd(spatial_rdd_core, input_boundary)

        spatial_rdd_core = PolygonRDD(
            self.sc,
            input_location,
            splitter,
            True
        )

        self.compare_spatial_rdd(spatial_rdd_core, input_boundary)

        query_window_rdd = PolygonRDD(
            self.sc,
            polygon_rdd_input_location,
            polygon_rdd_start_offset,
            polygon_rdd_end_offset,
            polygon_rdd_splitter,
            True,
            5,
            StorageLevel.MEMORY_ONLY
        )

        assert query_window_rdd.analyze()
        assert query_window_rdd.approximateTotalCount == 3000

        query_window_rdd = PolygonRDD(
            self.sc,
            polygon_rdd_input_location,
            polygon_rdd_start_offset,
            polygon_rdd_end_offset,
            polygon_rdd_splitter,
            True,
            StorageLevel.MEMORY_ONLY
        )

        assert query_window_rdd.analyze()
        assert query_window_rdd.approximateTotalCount == 3000

        spatial_rdd_core = PolygonRDD(
            self.sc,
            input_location,
            splitter,
            True,
            5,
            StorageLevel.MEMORY_ONLY
        )

        self.compare_spatial_rdd(spatial_rdd_core, input_boundary)

        spatial_rdd_core = PolygonRDD(
            self.sc,
            input_location,
            splitter,
            True,
            StorageLevel.MEMORY_ONLY
        )

        self.compare_spatial_rdd(spatial_rdd_core, input_boundary)

        spatial_rdd = PolygonRDD(
            spatial_rdd_core.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070"
        )
        self.compare_spatial_rdd(spatial_rdd, query_envelope)

        query_window_rdd = PolygonRDD(
            self.sc,
            polygon_rdd_input_location,
            polygon_rdd_start_offset,
            polygon_rdd_end_offset,
            polygon_rdd_splitter,
            True,
            5,
            StorageLevel.MEMORY_ONLY,
            "epsg:4326",
            "epsg:5070"
        )

        assert query_window_rdd.analyze()
        assert query_window_rdd.approximateTotalCount == 3000

        query_window_rdd = PolygonRDD(
            self.sc,
            polygon_rdd_input_location,
            polygon_rdd_start_offset,
            polygon_rdd_end_offset,
            polygon_rdd_splitter,
            True,
            StorageLevel.MEMORY_ONLY,
            "epsg:4326",
            "epsg:5070"
        )

        assert query_window_rdd.analyze()
        assert query_window_rdd.approximateTotalCount == 3000

        spatial_rdd_core = PolygonRDD(
            self.sc,
            input_location,
            splitter,
            True,
            5,
            StorageLevel.MEMORY_ONLY,
            "epsg:4326",
            "epsg:5070"
        )

        self.compare_spatial_rdd(spatial_rdd_core, query_envelope)
        spatial_rdd_core = PolygonRDD(
            self.sc,
            input_location,
            splitter,
            True,
            StorageLevel.MEMORY_ONLY,
            "epsg:4326",
            "epsg:5070"
        )

        spatial_rdd_core = PolygonRDD(
            sparkContext=self.sc,
            InputLocation=input_location,
            splitter=splitter,
            carryInputData=True,
            newLevel=StorageLevel.MEMORY_ONLY,
            sourceEpsgCRSCode="epsg:4326",
            targetEpsgCode="epsg:5070"
        )

        self.compare_spatial_rdd(spatial_rdd_core, query_envelope)