예제 #1
0
파일: geo.py 프로젝트: vertica/VerticaPy
def coordinate_converter(
    vdf: vDataFrame,
    x: str,
    y: str,
    x0: float = 0.0,
    earth_radius: float = 6371,
    reverse: bool = False,
):
    """
---------------------------------------------------------------------------
Converts between geographic coordinates (latitude and longitude) and 
Euclidean coordinates (x,y).

Parameters
----------
vdf: vDataFrame
    input vDataFrame.
x: str
    vColumn used as the abscissa (longitude).
y: str
    vColumn used as the ordinate (latitude).
x0: float, optional
    The initial abscissa.
earth_radius: float, optional
    Earth radius in km.
reverse: bool, optional
    If set to True, the Euclidean coordinates are converted to latitude 
    and longitude.

Returns
-------
vDataFrame
    result of the transformation.
    """
    check_types([
        ("vdf", vdf, [vDataFrame]),
        ("x", x, [str]),
        ("y", y, [str]),
        ("x0", x0, [int, float]),
        ("earth_radius", earth_radius, [int, float]),
        ("reverse", reverse, [bool]),
    ])
    vdf.are_namecols_in([x, y])

    result = vdf.copy()

    if reverse:

        result[x] = result[x] / earth_radius * 180 / st.pi + x0
        result[y] = ((st.atan(st.exp(result[y] / earth_radius)) - st.pi / 4) /
                     st.pi * 360)

    else:

        result[x] = earth_radius * ((result[x] - x0) * st.pi / 180)
        result[y] = earth_radius * st.ln(
            st.tan(result[y] * st.pi / 360 + st.pi / 4))

    return result
예제 #2
0
def het_breuschpagan(
    vdf: vDataFrame, eps: str, X: list,
):
    """
---------------------------------------------------------------------------
Breusch-Pagan test for heteroscedasticity.

Parameters
----------
vdf: vDataFrame
    Input vDataFrame.
eps: str
    Input residual vcolumn.
X: list
    Exogenous Variables to test the heteroscedasticity on.

Returns
-------
tablesample
    An object containing the result. For more information, see
    utilities.tablesample.
    """
    check_types(
        [("eps", eps, [str],), ("X", X, [list],), ("vdf", vdf, [vDataFrame, str,],),],
    )
    columns_check([eps] + X, vdf)
    eps = vdf_columns_names([eps], vdf)[0]
    X = vdf_columns_names(X, vdf)

    from verticapy.learn.linear_model import LinearRegression

    schema_writing = vdf._VERTICAPY_VARIABLES_["schema_writing"]
    if not (schema_writing):
        schema_writing = "public"
    name = schema_writing + ".VERTICAPY_TEMP_MODEL_LINEAR_REGRESSION_{}".format(
        get_session(vdf._VERTICAPY_VARIABLES_["cursor"])
    )
    model = LinearRegression(name, cursor=vdf._VERTICAPY_VARIABLES_["cursor"])
    vdf_copy = vdf.copy()
    vdf_copy["VERTICAPY_TEMP_eps2"] = vdf_copy[eps] ** 2
    try:
        model.fit(vdf_copy, X, "VERTICAPY_TEMP_eps2")
        R2 = model.score("r2")
        model.drop()
    except:
        try:
            model.set_params({"solver": "bfgs"})
            model.fit(vdf_copy, X, "VERTICAPY_TEMP_eps2")
            R2 = model.score("r2")
            model.drop()
        except:
            model.drop()
            raise
    n = vdf.shape()[0]
    k = len(X)
    LM = n * R2
    lm_pvalue = chi2.sf(LM, k)
    F = (n - k - 1) * R2 / (1 - R2) / k
    f_pvalue = f.sf(F, k, n - k - 1)
    result = tablesample(
        {
            "index": [
                "Lagrange Multiplier Statistic",
                "lm_p_value",
                "F Value",
                "f_p_value",
            ],
            "value": [LM, lm_pvalue, F, f_pvalue],
        }
    )
    return result