예제 #1
0
파일: geo.py 프로젝트: vertica/VerticaPy
def create_index(
    vdf: vDataFrame,
    gid: str,
    g: str,
    index: str,
    overwrite: bool = False,
    max_mem_mb: int = 256,
    skip_nonindexable_polygons: bool = False,
):
    """
---------------------------------------------------------------------------
Creates a spatial index on a set of polygons to speed up spatial 
intersection with a set of points.

Parameters
----------
vdf: vDataFrame
    vDataFrame to use to compute the spatial join.
gid: str
    Name of an integer column that uniquely identifies the polygon. 
    The gid cannot be NULL.
g: str
    Name of a geometry or geography (WGS84) column or expression that 
    contains polygons and multipolygons. Only polygon and multipolygon 
    can be indexed. Other shape types are excluded from the index.
index: str
    Name of the index.
overwrite: bool, optional
    BOOLEAN value that specifies whether to overwrite the index, if an 
    index exists.
max_mem_mb: int, optional
    A positive integer that assigns a limit to the amount of memory in 
    megabytes that create_index can allocate during index construction.
skip_nonindexable_polygons: bool, optional
    In rare cases, intricate polygons (for instance, with too high 
    resolution or anomalous spikes) cannot be indexed. These polygons 
    are considered non-indexable. 
    When set to False, non-indexable polygons cause the index creation 
    to fail. When set to True, index creation can succeed by excluding 
    non-indexable polygons from the index.

Returns
-------
tablesample
    An object containing the result. For more information, see
    utilities.tablesample.
    """
    check_types([
        ("vdf", vdf, [vDataFrame]),
        ("gid", gid, [str]),
        ("index", index, [str]),
        ("g", g, [str]),
        ("overwrite", overwrite, [bool]),
        ("max_mem_mb", max_mem_mb, [int]),
        ("skip_nonindexable_polygons", skip_nonindexable_polygons, [bool]),
    ])
    vdf.are_namecols_in([gid, g])
    gid, g = vdf.format_colnames([gid, g])

    query = """SELECT 
                    STV_Create_Index({0}, {1} 
                                     USING PARAMETERS 
                                        index='{2}', 
                                        overwrite={3} , 
                                        max_mem_mb={4}, 
                                        skip_nonindexable_polygons={5}) 
                                        OVER() 
                FROM {6}""".format(
        gid,
        g,
        index,
        overwrite,
        max_mem_mb,
        skip_nonindexable_polygons,
        vdf.__genSQL__(),
    )

    return to_tablesample(query)
예제 #2
0
파일: geo.py 프로젝트: vertica/VerticaPy
def intersect(vdf: vDataFrame,
              index: str,
              gid: str,
              g: str = "",
              x: str = "",
              y: str = ""):
    """
---------------------------------------------------------------------------
Spatially intersects a point or points with a set of polygons.

Parameters
----------
vdf: vDataFrame
    vDataFrame to use to compute the spatial join.
index: str
    Name of the index.
gid: str
    An integer column or integer that uniquely identifies the spatial 
    object(s) of g or x and y.
g: str, optional
    A geometry or geography (WGS84) column that contains points. 
    The g column can contain only point geometries or geographies.
x: str, optional
    x-coordinate or longitude.
y: str, optional
    y-coordinate or latitude.

Returns
-------
vDataFrame
    object containing the result of the intersection.
    """
    check_types([
        ("vdf", vdf, [vDataFrame]),
        ("gid", gid, [str]),
        ("g", g, [str]),
        ("x", x, [str]),
        ("y", y, [str]),
        ("index", index, [str]),
    ])
    vdf.are_namecols_in([gid])

    table = vdf.__genSQL__()

    if g:

        vdf.are_namecols_in(g)
        g = vdf.format_colnames(g)
        query = (f"(SELECT STV_Intersect({gid}, {g} USING PARAMETERS"
                 f" index='{index}') OVER (PARTITION BEST) AS "
                 f"(point_id, polygon_gid) FROM {table}) x")

    elif x and y:

        vdf.are_namecols_in([x, y])
        x, y = vdf.format_colnames([x, y])
        query = (f"(SELECT STV_Intersect({gid}, {x}, {y} USING PARAMETERS"
                 f" index='{index}') OVER (PARTITION BEST) AS "
                 f"(point_id, polygon_gid) FROM {table}) x")

    else:

        raise ParameterError("Either 'x' and 'y' or 'g' must not be empty.")

    return vDataFrameSQL(query)