def compare_spatial_rdd(self, spatial_rdd: SpatialRDD, envelope: Envelope) -> bool: spatial_rdd.analyze() assert input_count == spatial_rdd.approximateTotalCount assert envelope == spatial_rdd.boundaryEnvelope return True
def toDf(cls, spatialRDD: SedonaRDD, spark: SparkSession, fieldNames: List = None) -> DataFrame: srdd = SpatialRDD(spatialRDD.sc) srdd.setRawSpatialRDD(spatialRDD.jsrdd) if fieldNames: return Adapter.toDf(srdd, fieldNames, spark) else: return Adapter.toDf(srdd, spark)
def readToGeometryRDD(cls, sc: SparkContext, inputPath: str) -> SpatialRDD: """ :param sc: SparkContext :param inputPath: str, file input location :return: SpatialRDD """ jvm = sc._jvm srdd = jvm.GeoJsonReader.readToGeometryRDD(sc._jsc, inputPath) spatial_rdd = SpatialRDD(sc) spatial_rdd.set_srdd(srdd) return spatial_rdd
def partition_rdds(self, query_rdd: SpatialRDD, spatial_rdd: SpatialRDD, grid_type, use_legacy_apis): spatial_rdd.spatialPartitioning(grid_type) if use_legacy_apis: if grid_type != GridType.QUADTREE: query_rdd.spatialPartitioning(spatial_rdd.grids) else: query_rdd.spatialPartitioning(spatial_rdd.partitionTree) else: query_rdd.spatialPartitioning(spatial_rdd.getPartitioner())
def readToGeometryRDD(cls, sc: SparkContext, inputPath: str) -> SpatialRDD: """ :param sc: :param inputPath: :return: """ jvm = sc._jvm jsc = sc._jsc srdd = jvm.ShapefileReader.readToGeometryRDD(jsc, inputPath) spatial_rdd = SpatialRDD(sc=sc) spatial_rdd.set_srdd(srdd) return spatial_rdd
def readToGeometryRDD(cls, rawTextRDD: RDD) -> SpatialRDD: """ :param rawTextRDD: RDD :return: SpatialRDD """ sc = rawTextRDD.ctx jvm = sc._jvm srdd = jvm.GeoJsonReader.readToGeometryRDD(rawTextRDD._jrdd) spatial_rdd = SpatialRDD(sc) spatial_rdd.set_srdd(srdd) return spatial_rdd
def toSpatialRdd(cls, dataFrame: DataFrame): """ :param dataFrame: :return: """ sc = dataFrame._sc jvm = sc._jvm srdd = jvm.Adapter.toSpatialRdd(dataFrame._jdf) spatial_rdd = SpatialRDD(sc) spatial_rdd.set_srdd(srdd) return spatial_rdd
def readToGeometryRDD(cls, rawTextRDD: RDD, wktColumn: int, allowInvalidGeometries: bool, skipSyntacticallyInvalidGeometries: bool) -> SpatialRDD: """ :param rawTextRDD: RDD :param wktColumn: int :param allowInvalidGeometries: bool :param skipSyntacticallyInvalidGeometries: bool :return: """ sc = rawTextRDD.ctx jvm = sc._jvm srdd = jvm.WktReader.readToGeometryRDD( rawTextRDD._jrdd, wktColumn, allowInvalidGeometries, skipSyntacticallyInvalidGeometries ) spatial_rdd = SpatialRDD(sc) spatial_rdd.set_srdd(srdd) return spatial_rdd
def readToGeometryRDD(cls, sc: SparkContext, inputPath: str, wktColumn: int, allowInvalidGeometries: bool, skipSyntacticallyInvalidGeometries: bool) -> SpatialRDD: """ :param sc: SparkContext :param inputPath: str :param wktColumn: int :param allowInvalidGeometries: bool :param skipSyntacticallyInvalidGeometries: bool :return: """ jvm = sc._jvm srdd = jvm.WktReader.readToGeometryRDD(sc._jsc, inputPath, wktColumn, allowInvalidGeometries, skipSyntacticallyInvalidGeometries) spatial_rdd = SpatialRDD(sc) spatial_rdd.set_srdd(srdd) return spatial_rdd
def toSpatialRdd(cls, dataFrame: DataFrame, geometryFieldName: str) -> SpatialRDD: """ :param dataFrame: :param geometryFieldName: :return: """ sc = dataFrame._sc jvm = sc._jvm srdd = jvm.Adapter.toSpatialRdd(dataFrame._jdf, geometryFieldName) spatial_rdd = SpatialRDD(sc) spatial_rdd.set_srdd(srdd) return spatial_rdd
def toSpatialRdd(cls, dataFrame: DataFrame, fieldNames: List) -> SpatialRDD: """ :param dataFrame: :param geometryFieldName: :param fieldNames: :return: """ sc = dataFrame._sc jvm = sc._jvm srdd = jvm.PythonAdapterWrapper.toSpatialRdd(dataFrame._jdf, fieldNames) spatial_rdd = SpatialRDD(sc) spatial_rdd.set_srdd(srdd) return spatial_rdd
def readToGeometryRDD( cls, sc: SparkContext, inputPath: str, allowInvalidGeometries: bool, skipSyntacticallyInvalidGeometries: bool) -> SpatialRDD: """ :param sc: SparkContext :param inputPath: str, path to the file :param allowInvalidGeometries: bool :param skipSyntacticallyInvalidGeometries: bool :return: SpatialRDD """ jvm = sc._jvm srdd = jvm.GeoJsonReader.readToGeometryRDD( sc._jsc, inputPath, allowInvalidGeometries, skipSyntacticallyInvalidGeometries) spatial_rdd = SpatialRDD(sc) spatial_rdd.set_srdd(srdd) return spatial_rdd
def compare_count(self, spatial_rdd: SpatialRDD, cnt: int, envelope: Envelope): spatial_rdd.analyze() assert cnt == spatial_rdd.approximateTotalCount assert envelope == spatial_rdd.boundaryEnvelope
def prepare_rdd(self, object_rdd: SpatialRDD, window_rdd: SpatialRDD, grid_type: GridType): object_rdd.analyze() window_rdd.analyze() object_rdd.rawSpatialRDD.repartition(4) object_rdd.spatialPartitioning(grid_type) object_rdd.buildIndex(IndexType.RTREE, True) window_rdd.spatialPartitioning(object_rdd.getPartitioner())
def partition_rdds(self, query_rdd: SpatialRDD, spatial_rdd: SpatialRDD, grid_type): spatial_rdd.spatialPartitioning(grid_type) return query_rdd.spatialPartitioning(spatial_rdd.getPartitioner())