Exemplo n.º 1
0
 def setRadius(self, givenRadius: float):
     center_geometry_mbr = Envelope.from_shapely_geom(self.centerGeometry)
     width = center_geometry_mbr.maxx - center_geometry_mbr.minx
     length = center_geometry_mbr.maxy - center_geometry_mbr.miny
     center_geometry_internal_radius = sqrt(width**2 + length**2) / 2
     self.radius = givenRadius if givenRadius > center_geometry_internal_radius else center_geometry_internal_radius
     self.MBR = Envelope(self.centerPoint.x - self.radius,
                         self.centerPoint.x + self.radius,
                         self.centerPoint.y - self.radius,
                         self.centerPoint.y + self.radius)
Exemplo n.º 2
0
class TestPolygonRange(TestBase):
    loop_times = 5
    query_envelope = Envelope(-85.01, -60.01, 34.01, 50.01)

    def test_spatial_range_query(self):
        spatial_rdd = PolygonRDD(
            self.sc, input_location, splitter, True, StorageLevel.MEMORY_ONLY
        )
        for i in range(self.loop_times):
            result_size = RangeQuery.\
                SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False).count()
            assert result_size == 704

        assert RangeQuery.SpatialRangeQuery(
            spatial_rdd, self.query_envelope, False, False).take(10)[0].getUserData() is not None

    def test_spatial_range_query_using_index(self):
        spatial_rdd = PolygonRDD(
            self.sc, input_location, splitter, True, StorageLevel.MEMORY_ONLY
        )
        spatial_rdd.buildIndex(IndexType.RTREE, False)
        for i in range(self.loop_times):
            result_size = RangeQuery.\
                SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False).count()
            assert result_size == 704

        assert RangeQuery.SpatialRangeQuery(
            spatial_rdd, self.query_envelope, False, False).take(10)[0].getUserData() is not None
Exemplo n.º 3
0
class TestPointRange(TestBase):
    loop_times = 5
    query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01)

    def test_spatial_range_query(self):
        spatial_rdd = PointRDD(self.sc, input_location, offset, splitter,
                               False)
        for i in range(self.loop_times):
            result_size = RangeQuery.\
                SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False)\
                .count()
            assert result_size == 2830
        assert RangeQuery.SpatialRangeQuery(
            spatial_rdd, self.query_envelope, False, False).take(10)[1].\
                   getUserData() is not None

    def test_spatial_range_query_using_index(self):
        spatial_rdd = PointRDD(self.sc, input_location, offset, splitter,
                               False)

        spatial_rdd.buildIndex(IndexType.RTREE, False)

        for i in range(self.loop_times):
            result_size = RangeQuery.\
                SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False)\
                .count()
            assert result_size == 2830
        assert RangeQuery.SpatialRangeQuery(
            spatial_rdd, self.query_envelope, False, False).take(10)[1].\
                   getUserData() is not None
Exemplo n.º 4
0
class TestRectangleRange(TestBase):
    query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01)
    loop_times = 5

    def test_spatial_range_query(self):
        spatial_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter,
                                   True, StorageLevel.MEMORY_ONLY)

        for i in range(self.loop_times):
            result_size = RangeQuery.SpatialRangeQuery(spatial_rdd,
                                                       self.query_envelope,
                                                       False, False).count()
            assert result_size == 193

        assert RangeQuery.SpatialRangeQuery(
            spatial_rdd, self.query_envelope, False,
            False).take(10)[1].getUserData() is not None

    def test_spatial_range_query_using_index(self):
        spatial_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter,
                                   True, StorageLevel.MEMORY_ONLY)

        spatial_rdd.buildIndex(IndexType.RTREE, False)
        for i in range(self.loop_times):
            result_size = RangeQuery.SpatialRangeQuery(spatial_rdd,
                                                       self.query_envelope,
                                                       False, True).count()
            assert result_size == 193

        assert RangeQuery.SpatialRangeQuery(spatial_rdd, self.query_envelope, False, True).take(10)[1].getUserData()\
               is not None
Exemplo n.º 5
0
    def boundaryEnvelope(self) -> Envelope:
        """

        :return:
        """
        if not self._is_analyzed:
            raise TypeError("Please use analyze before")
        java_boundary_envelope = get_field(self._srdd, "boundaryEnvelope")
        return Envelope.from_jvm_instance(java_boundary_envelope)
Exemplo n.º 6
0
    def boundary(self) -> Envelope:
        """

        :return:
        """

        jvm_boundary = self._srdd.boundary()

        envelope = Envelope.from_jvm_instance(jvm_boundary)
        return envelope
Exemplo n.º 7
0
    def test_to_spatial_rdd_df(self):
        spatial_df = self._create_spatial_point_table()

        spatial_rdd = Adapter.toSpatialRdd(spatial_df)

        spatial_rdd.analyze()

        assert spatial_rdd.approximateTotalCount == 121960
        assert spatial_rdd.boundaryEnvelope == Envelope(
            -179.147236, 179.475569, -14.548699, 71.35513400000001)
Exemplo n.º 8
0
    def __init__(self, centerGeometry: BaseGeometry, givenRadius: float):
        self.MBR = None
        self.centerGeometry = centerGeometry
        self.radius = givenRadius
        center_geometry_mbr = Envelope.from_shapely_geom(self.centerGeometry)
        self.centerPoint = self.centerPoint = Point(
            (center_geometry_mbr.minx + center_geometry_mbr.maxx) / 2.0,
            (center_geometry_mbr.miny + center_geometry_mbr.maxy) / 2.0)

        width = center_geometry_mbr.maxx - center_geometry_mbr.minx
        length = center_geometry_mbr.maxy - center_geometry_mbr.miny

        center_geometry_internal_radius = sqrt(width**2 + length**2) / 2.0
        self.radius = givenRadius if givenRadius > center_geometry_internal_radius else center_geometry_internal_radius
        self.MBR = Envelope(self.centerPoint.x - self.radius,
                            self.centerPoint.x + self.radius,
                            self.centerPoint.y - self.radius,
                            self.centerPoint.y + self.radius)
        super().__init__(self.centerPoint.buffer(self.radius))
Exemplo n.º 9
0
 def test_read_to_point_rdd(self):
     input_location = os.path.join(tests_path, "resources/shapefiles/point")
     spatial_rdd = ShapefileReader.readToPointRDD(self.sc, input_location)
     geometry_rdd = ShapefileReader.readToGeometryRDD(
         self.sc, input_location)
     window = Envelope(-180.0, 180.0, -90.0, 90.0)
     count = RangeQuery.SpatialRangeQuery(spatial_rdd, window, False,
                                          False).count()
     assert spatial_rdd.rawSpatialRDD.count() == count
     assert 'org.datasyslab.geospark.spatialRDD.SpatialRDD' in geometry_rdd._srdd.toString(
     )
     assert 'org.datasyslab.geospark.spatialRDD.PointRDD' in spatial_rdd._srdd.toString(
     )
Exemplo n.º 10
0
    def test_to_rdd_from_dataframe(self):
        spatial_df = self._create_spatial_point_table()

        spatial_df.show()

        jsrdd = Adapter.toRdd(spatial_df)

        spatial_rdd = SpatialRDD(self.sc)
        spatial_rdd.rawJvmSpatialRDD = jsrdd
        spatial_rdd.analyze()

        assert spatial_rdd.approximateTotalCount == 121960
        assert spatial_rdd.boundaryEnvelope == Envelope(
            -179.147236, 179.475569, -14.548699, 71.35513400000001)
Exemplo n.º 11
0
    def grids(self) -> Optional[List[Envelope]]:
        """
        Returns grids for SpatialRDD, it is a list of Envelopes.

        >> spatial_rdd.grids
        >> [Envelope(minx=10.0, maxx=12.0, miny=10.0, maxy=12.0)]
        :return:
        """
        jvm_grids = self.jvm_grids.jgrid
        if jvm_grids:
            number_of_grids = jvm_grids.size()

            envelopes = [Envelope.from_jvm_instance(jvm_grids[index]) for index in range(number_of_grids)]

            return envelopes
        else:
            return None
Exemplo n.º 12
0
import os

from geospark.core.enums import FileDataSplitter
from geospark.core.geom.envelope import Envelope
from tests.tools import tests_path

input_location = os.path.join(tests_path, "resources/arealm-small.csv")
query_window_set = os.path.join("zcta510-small.csv")
offset = 1
splitter = FileDataSplitter.CSV
grid_type = "rtree"
index_type = "rtree"
num_partitions = 11
distance = 0.01
query_polygon_set = "primaryroads-polygon.csv"
input_count = 3000
input_boundary = Envelope(minx=-173.120769,
                          maxx=-84.965961,
                          miny=30.244859,
                          maxy=71.355134)
rectangle_match_count = 103
rectangle_with_original_duplicates_count = 103
polygon_match_count = 472
polygon_match_with_original_duplicates_count = 562

transformed_envelope = Envelope(14313844.29433424, 16587207.463797055,
                                942450.5989896542, 6697987.652517834)
crs_point_test = os.path.join(tests_path, "resources/crs-test-point.csv")
crs_envelope = Envelope(26.992172, 71.35513400000001, -179.147236, 179.475569)
crs_envelope_transformed = Envelope(-5446655.086752228, 1983668.382852457,
                                    534241.8622328962, 6143259.02554563)
Exemplo n.º 13
0
    def test_boundary(self):
        spatial_rdd = self.create_spatial_rdd()
        envelope = spatial_rdd.boundary()

        assert envelope == Envelope(minx=-173.120769, maxx=-84.965961, miny=30.244859, maxy=71.355134)
Exemplo n.º 14
0
 def test_boundary_envelope(self):
     spatial_rdd = self.create_spatial_rdd()
     spatial_rdd.analyze()
     assert Envelope(
         minx=-173.120769, maxx=-84.965961, miny=30.244859, maxy=71.355134) == spatial_rdd.boundaryEnvelope
Exemplo n.º 15
0
 def _compute_envelope_internal(self):
     if self.is_empty:
         return Envelope()
     return self.MBR
Exemplo n.º 16
0
 def test_jvm_envelope(self):
     envelope = Envelope(0.0, 5.0, 0.0, 5.0)
     jvm_instance = envelope.create_jvm_instance(self.spark.sparkContext._jvm)
     envelope_area = jvm_instance.getArea()
     assert envelope_area == 25.0, f"Expected area to be equal 25 but {envelope_area} was found"
from geospark.core.geom.envelope import Envelope
from tests.tools import tests_path

input_location = os.path.join(tests_path,
                              "resources/primaryroads-linestring.csv")
query_window_set = os.path.join(tests_path, "resources/zcta510-small.csv")
offset = 0
splitter = FileDataSplitter.CSV
grid_type = "rtree"
index_type = "rtree"
num_partitions = 5
distance = 0.01
query_polygon_set = os.path.join(tests_path,
                                 "resources/primaryroads-polygon.csv")
input_count = 3000
input_boundary = Envelope(minx=-123.393766,
                          maxx=-65.648659,
                          miny=17.982169,
                          maxy=49.002374)
input_boundary_2 = Envelope(minx=-123.393766,
                            maxx=-65.649956,
                            miny=17.982169,
                            maxy=49.002374)
match_count = 535
match_with_origin_with_duplicates_count = 875

transformed_envelope = Envelope(14313844.294334238, 16791709.853587367,
                                942450.5989896103, 8474779.278028358)
transformed_envelope_2 = Envelope(14313844.294334238, 16791709.853587367,
                                  942450.5989896103, 8474645.488977432)
Exemplo n.º 18
0
from geospark.core.spatialOperator import RangeQuery
from tests.test_base import TestBase
from tests.tools import tests_path

input_location = os.path.join(tests_path, "resources/arealm-small.csv")
queryWindowSet = os.path.join("zcta510-small.csv")
offset = 1
splitter = FileDataSplitter.CSV
gridType = "rtree"
indexType = "rtree"
numPartitions = 11
distance = 0.01
queryPolygonSet = "primaryroads-polygon.csv"
inputCount = 3000
inputBoundary = Envelope(minx=-173.120769,
                         maxx=-84.965961,
                         miny=30.244859,
                         maxy=71.355134)
rectangleMatchCount = 103
rectangleMatchWithOriginalDuplicatesCount = 103
polygonMatchCount = 472
polygonMatchWithOriginalDuplicatesCount = 562


class TestPointRange(TestBase):
    loop_times = 5
    query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01)

    def test_spatial_range_query(self):
        spatial_rdd = PointRDD(self.sc, input_location, offset, splitter,
                               False)
        for i in range(self.loop_times):
Exemplo n.º 19
0
class TestRectangleKNN(TestBase):
    query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01)
    loop_times = 5
    query_point = Point(-84.01, 34.01)
    top_k = 100
    query_polygon = Polygon([(-84.01, 34.01), (-84.01, 34.11), (-83.91, 34.11),
                             (-83.91, 34.01), (-84.01, 34.01)])
    query_line = LineString([(-84.01, 34.01), (-84.01, 34.11), (-83.91, 34.11),
                             (-83.91, 34.01)])

    def test_spatial_knn_query(self):
        rectangle_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter,
                                     True)

        for i in range(self.loop_times):
            result = KNNQuery.SpatialKnnQuery(rectangle_rdd, self.query_point,
                                              self.top_k, False)

            assert result.__len__() > -1
            assert result[0].getUserData() is not None

    def test_spatial_knn_query_using_index(self):
        rectangle_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter,
                                     True)
        rectangle_rdd.buildIndex(IndexType.RTREE, False)

        for i in range(self.loop_times):
            result = KNNQuery.SpatialKnnQuery(rectangle_rdd, self.query_point,
                                              self.top_k, False)

            assert result.__len__() > -1
            assert result[0].getUserData() is not None

    def test_spatial_knn_query_correctness(self):
        rectangle_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter,
                                     True)

        result_no_index = KNNQuery.SpatialKnnQuery(rectangle_rdd,
                                                   self.query_point,
                                                   self.top_k, False)
        rectangle_rdd.buildIndex(IndexType.RTREE, False)

        result_with_index = KNNQuery.SpatialKnnQuery(rectangle_rdd,
                                                     self.query_point,
                                                     self.top_k, True)

        sorted_result_no_index = sorted(
            result_no_index,
            key=lambda geo_data: distance_sorting_functions(
                geo_data, self.query_point))

        sorted_result_with_index = sorted(
            result_with_index,
            key=lambda geo_data: distance_sorting_functions(
                geo_data, self.query_point))

        difference = 0
        for x in range(self.top_k):
            difference += sorted_result_no_index[x].geom.distance(
                sorted_result_with_index[x].geom)

        assert difference == 0

    def test_spatial_knn_using_polygon(self):
        rectangle_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter,
                                     True)

        result_no_index = KNNQuery.SpatialKnnQuery(rectangle_rdd,
                                                   self.query_polygon,
                                                   self.top_k, False)

        print(result_no_index)

    def test_spatial_knn_using_linestring(self):
        rectangle_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter,
                                     True)

        result_no_index = KNNQuery.SpatialKnnQuery(rectangle_rdd,
                                                   self.query_line, self.top_k,
                                                   False)

        print(result_no_index)
Exemplo n.º 20
0
from tests.tools import tests_path

resource_folder = "resources"

point_rdd_input_location = os.path.join(tests_path, resource_folder,
                                        "arealm-small.csv")

point_rdd_splitter = FileDataSplitter.CSV

point_rdd_index_type = IndexType.RTREE
point_rdd_num_partitions = 5
point_rdd_offset = 1

knn_query_point = Point(-84.01, 34.01)

range_query_window = Envelope(-90.01, -80.01, 30.01, 40.01)

join_query_partitionin_type = GridType.QUADTREE
each_query_loop_times = 1


class TestSpatialRDD(TestBase):
    def test_empty_constructor_test(self):
        object_rdd = PointRDD(sparkContext=self.sc,
                              InputLocation=point_rdd_input_location,
                              Offset=point_rdd_offset,
                              splitter=point_rdd_splitter,
                              carryInputData=False)
        object_rdd_copy = PointRDD()
        object_rdd_copy.rawJvmSpatialRDD = object_rdd.rawJvmSpatialRDD
        object_rdd_copy.analyze()
Exemplo n.º 21
0
from geospark.core.spatialOperator import KNNQuery
from tests.test_base import TestBase
from tests.tools import tests_path, distance_sorting_functions

inputLocation = os.path.join(tests_path, "resources/zcta510-small.csv")
queryWindowSet = os.path.join(tests_path, "resources/zcta510-small.csv")
offset = 0
splitter = FileDataSplitter.CSV
gridType = "rtree"
indexType = "rtree"
numPartitions = 11
distance = 0.001
queryPolygonSet = os.path.join(tests_path,
                               "resources/primaryroads-polygon.csv")
inputCount = 3000
inputBoundary = Envelope(-171.090042, 145.830505, -14.373765, 49.00127)
matchCount = 17599
matchWithOriginalDuplicatesCount = 17738


class TestRectangleKNN(TestBase):
    query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01)
    loop_times = 5
    query_point = Point(-84.01, 34.01)
    top_k = 100
    query_polygon = Polygon([(-84.01, 34.01), (-84.01, 34.11), (-83.91, 34.11),
                             (-83.91, 34.01), (-84.01, 34.01)])
    query_line = LineString([(-84.01, 34.01), (-84.01, 34.11), (-83.91, 34.11),
                             (-83.91, 34.01)])

    def test_spatial_knn_query(self):
Exemplo n.º 22
0
from tests.test_base import TestBase
from tests.tools import tests_path

inputLocation = os.path.join(tests_path, "resources/zcta510-small.csv")
queryWindowSet = os.path.join(tests_path, "resources/zcta510-small.csv")
offset = 0
splitter = FileDataSplitter.CSV
gridType = "rtree"
indexType = "rtree"
numPartitions = 11
distance = 0.001
queryPolygonSet = os.path.join(tests_path,
                               "resources/primaryroads-polygon.csv")
inputCount = 3000
inputBoundary = Envelope(minx=-171.090042,
                         maxx=145.830505,
                         miny=-14.373765,
                         maxy=49.00127)
matchCount = 17599
matchWithOriginalDuplicatesCount = 17738


class TestRectangleRDD(TestBase):
    def test_constructor(self):
        spatial_rdd = RectangleRDD(sparkContext=self.sc,
                                   InputLocation=inputLocation,
                                   Offset=offset,
                                   splitter=splitter,
                                   carryInputData=True,
                                   partitions=numPartitions,
                                   newLevel=StorageLevel.MEMORY_ONLY)
Exemplo n.º 23
0
import os

from shapely.geometry import Point

from geospark.core.enums import FileDataSplitter, GridType, IndexType
from geospark.core.geom.envelope import Envelope
from tests.tools import tests_path

input_location = os.path.join(tests_path, "resources/crs-test-point.csv")
offset = 0
splitter = FileDataSplitter.CSV
grid_type = GridType.RTREE
index_type = IndexType.RTREE
num_partitions = 11
distance = 0.01
input_location_query_polygon = os.path.join(tests_path,
                                            "resources/crs-test-polygon.csv")
loop_times = 5
query_envelope = Envelope(30.01, 40.01, -90.01, -80.01)
query_point = Point(34.01, -84.01)
top_k = 100
Exemplo n.º 24
0
from geospark.core.enums import FileDataSplitter, IndexType
from geospark.core.geom.envelope import Envelope
from tests.tools import tests_path

input_location = os.path.join(tests_path, "resources/primaryroads-polygon.csv")
query_window_set = os.path.join(tests_path, "resources/zcta510-small.csv")
offset = 0
splitter = FileDataSplitter.CSV
grid_type = "rtree"
index_type = "rtree"
num_partitions = 5
distance = 0.01
input_location_query_polygon = os.path.join(tests_path,
                                            "resources/crs-test-polygon.csv")
query_polygon_count = 13361
query_envelope = Envelope(14313844.29433424, 16802290.85383074,
                          942450.5989896542, 8631908.270651951)
query_polygon_set = os.path.join(tests_path,
                                 "resources/primaryroads-polygon.csv")
input_location_geo_json = os.path.join(tests_path,
                                       "resources/testPolygon.json")
input_location_wkt = os.path.join(tests_path, "resources/county_small.tsv")
input_location_wkb = os.path.join(tests_path, "resources/county_small_wkb.tsv")
input_count = 3000
input_boundary = Envelope(minx=-158.104182,
                          maxx=-66.03575,
                          miny=17.986328,
                          maxy=48.645133)
contains_match_count = 6941
contains_match_with_original_duplicates_count = 9334
intersects_match_count = 24323
intersects_match_with_original_duplicates_count = 32726
Exemplo n.º 25
0
 def test_get_envelope_internal(self):
     point = Point(0.0, 0.0)
     circle = Circle(point, 0.1)
     assert Envelope(-0.1, 0.1, -0.1, 0.1) == circle.getEnvelopeInternal()