Exemplo n.º 1
0
def test_leastSquares() -> None:
    """The weights for least squares solution"""
    from resistics.regression.weights import leastSquares, getWeights
    import numpy as np

    data = np.array([1, 2, 3, 4, 5, 6])
    weights = leastSquares(data)
    getweights = getWeights(data, "leastsquares")
    np.testing.assert_equal(weights, np.ones(data.size))
    np.testing.assert_equal(getweights, weights)
Exemplo n.º 2
0
def test_andrewsWave() -> None:
    """The weights for Andrews wave"""
    from resistics.regression.weights import andrewsWave, getWeights
    import statsmodels.api as sm
    import numpy as np

    data = np.array([1, 2, 3, 4, 5, 6])
    weights = andrewsWave(data)
    getweights = getWeights(data, "andrewsWave")
    awave = sm.robust.norms.AndrewWave()
    smweights = awave.weights(data)
    np.testing.assert_equal(weights, smweights)
    np.testing.assert_equal(getweights, smweights)
Exemplo n.º 3
0
def test_trimmedMean() -> None:
    """The weights for trimmed mean"""
    from resistics.regression.weights import trimmedMean, getWeights
    import statsmodels.api as sm
    import numpy as np

    data = np.array([1, 2, 3, 4, 5, 6])
    weights = trimmedMean(data)
    getweights = getWeights(data, "trimmedMean")
    tmean = sm.robust.norms.TrimmedMean()
    smweights = tmean.weights(data)
    np.testing.assert_equal(weights, smweights)
    np.testing.assert_equal(getweights, smweights)
Exemplo n.º 4
0
def test_hampel() -> None:
    """The weights for Hampel"""
    from resistics.regression.weights import hampel, getWeights
    import statsmodels.api as sm
    import numpy as np

    data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    weights = hampel(data)
    getweights = getWeights(data, "hampel")
    hamp = sm.robust.norms.Hampel()
    smweights = hamp.weights(data)
    np.testing.assert_equal(weights, smweights)
    np.testing.assert_equal(getweights, smweights)
Exemplo n.º 5
0
def test_huber() -> None:
    """The weights for Huber"""
    from resistics.regression.weights import huber, getWeights
    import statsmodels.api as sm
    import numpy as np

    data = np.array([1, 2, 3, 4, 5, 6])
    weights = huber(data)
    getweights = getWeights(data, "huber")
    hub = sm.robust.norms.HuberT()
    smweights = hub.weights(data)
    np.testing.assert_equal(weights, smweights)
    np.testing.assert_equal(getweights, smweights)
Exemplo n.º 6
0
def test_bisquare() -> None:
    """The weights for bisquare"""
    from resistics.regression.weights import bisquare, getWeights
    import statsmodels.api as sm
    import numpy as np

    data = np.array([1, 2, 3, 4, 5, 6])
    weights = bisquare(data)
    getweights = getWeights(data, "bisquare")
    bisq = sm.robust.norms.TukeyBiweight()
    smweights = bisq.weights(data)
    np.testing.assert_equal(weights, smweights)
    np.testing.assert_equal(getweights, smweights)
Exemplo n.º 7
0
def chatterjeeMachlerMod(A, y, **kwargs):
    # using the weights in chaterjeeMachler means that min resids val in median(resids)
    # instead, use M estimate weights with a modified residual which includes a measure of leverage
    # for this, use residuals / (1-p)^2
    # I wonder if this will have a divide by zero bug
    from resistics.common.math import eps
    from resistics.regression.moments import getLocation, getScale
    from resistics.regression.weights import getWeights
    from resistics.regression.robust import defaultOptions, applyWeights, olsModel
    import numpy.linalg as linalg

    # now calculate p and n
    n = A.shape[0]
    p = A.shape[1]
    pnRatio = 1.0 * p / n

    # calculate the projection matrix
    q, r = linalg.qr(A)
    Pdiag = np.empty(shape=(n), dtype="float")
    for i in range(0, n):
        Pdiag[i] = np.absolute(np.sum(q[i, :] * np.conjugate(q[i, :]))).real
    del q, r
    Pdiag = Pdiag / (np.max(Pdiag) + 0.0000000001)
    locP = getLocation(Pdiag, "median")
    scaleP = getScale(Pdiag, "mad")
    # bound = locP + 6*scaleP
    bound = locP + 6 * scaleP
    indices = np.where(Pdiag > bound)
    Pdiag[indices] = 0.99999
    leverageMeas = np.power(1.0 - Pdiag, 2)

    # weights for the first iteration
    # this is purely based on the leverage
    tmp = np.ones(shape=(n), dtype="float") * pnRatio
    tmp = np.maximum(Pdiag, tmp)
    weights = np.reciprocal(tmp)

    # get options
    options = parseKeywords(defaultOptions(), kwargs, printkw=False)
    # generalPrint("S-Estimate", "Using weight function = {}".format(weightFnc))
    if options["intercept"] == True:
        # add column of ones for constant term
        A = np.hstack((np.ones(shape=(A.shape[0], 1), dtype="complex"), A))

    # iteratively weighted least squares
    iteration = 0
    while iteration < options["maxiter"]:
        # do the weighted least-squares
        Anew, ynew = applyWeights(A, y, weights)
        paramsNew, squareResidNew, rankNew, sNew = linalg.lstsq(Anew,
                                                                ynew,
                                                                rcond=None)
        residsNew = y - np.dot(A, paramsNew)
        # check residsNew to make sure not all zeros (i.e. will happen in undetermined or equally determined system)
        if np.sum(np.absolute(residsNew)) < eps():
            # then return everything here
            return paramsNew, residsNew, weights
        residsNew = residsNew / leverageMeas
        scale = getScale(residsNew, "mad0")

        # standardise and calculate weights
        residsNew = residsNew / scale
        weightsNew = getWeights(residsNew, "huber")
        # increment iteration
        iteration = iteration + 1
        weights = weightsNew
        params = paramsNew

        if iteration > 1:
            # check to see whether the change is smaller than the tolerance
            changeResids = linalg.norm(residsNew -
                                       resids) / linalg.norm(residsNew)
            if changeResids < eps():
                # update resids
                resids = residsNew
                break
        # update resids
        resids = residsNew

    # now do the same again, but with a different function
    # do the least squares solution
    params, resids, squareResid, rank, s = olsModel(A, y)
    resids = resids / leverageMeas
    resids = resids / scale
    weights = getWeights(resids, "trimmedMean")
    # iteratively weighted least squares
    iteration = 0
    while iteration < options["maxiter"]:
        # do the weighted least-squares
        Anew, ynew = applyWeights(A, y, weights)
        paramsNew, squareResidNew, rankNew, sNew = linalg.lstsq(Anew,
                                                                ynew,
                                                                rcond=None)
        residsNew = y - np.dot(A, paramsNew)
        # check residsNew to make sure not all zeros (i.e. will happen in undetermined or equally determined system)
        if np.sum(np.absolute(residsNew)) < eps():
            # then return everything here
            return paramsNew, residsNew, weights

        residsNew = residsNew / leverageMeas
        scale = getScale(residsNew, "mad0")

        # standardise and calculate weights
        residsNew = residsNew / scale
        weightsNew = getWeights(residsNew, options["weights"])
        # increment iteration
        iteration = iteration + 1
        weights = weightsNew
        params = paramsNew

        # check to see whether the change is smaller than the tolerance
        changeResids = linalg.norm(residsNew - resids) / linalg.norm(residsNew)
        if changeResids < eps():
            # update resids
            resids = residsNew
            break
        # update resids
        resids = residsNew

    # at the end, return the components
    return params, resids, weights
Exemplo n.º 8
0
def mestimateModel(A: np.ndarray, y: np.ndarray, **kwargs) -> Dict[str, Any]:
    r"""Mestimate robust least squares

    Solves for :math:`x` where,

    .. math::        
        y = Ax .

    Good method for dependent outliers (in :math:`y`). Not robust against independent outliers (leverage points)

    Parameters
    ----------
    A : np.ndarray
        Predictors, size nobs*nregressors
    y : np.ndarray
        Observations, size nobs
    initial : Dict
        Initial model parameters and scale
    scale : optional
        A scale estimate
    intercept : bool, optional
        True or False for adding an intercept term
    weights : str, optional
        The weights to use

    Returns
    -------
    RegressionData
        RegressionData instance with the parameters, residuals, weights and scale
    """
    from resistics.common.math import eps
    from resistics.regression.moments import getLocation, getScale
    from resistics.regression.weights import getWeights
    from resistics.regression.data import RegressionData
    import numpy.linalg as linalg

    options = parseKeywords(defaultOptions(), kwargs, printkw=False)
    # calculate the leverage
    n = A.shape[0]
    p = A.shape[1]
    # calculate the projection matrix
    q, r = linalg.qr(A)
    Pdiag = np.empty(shape=(n), dtype="float")
    for ii in range(0, n):
        Pdiag[ii] = np.absolute(np.sum(q[ii, :] * np.conjugate(q[ii, :]))).real
    Pdiag = Pdiag / np.max(Pdiag)
    leverageScale = getScale(Pdiag, "mad0")
    leverageWeights = getWeights(Pdiag / leverageScale, "huber")

    if options["intercept"] == True:
        # add column of ones for constant term
        A = np.hstack((np.ones(shape=(A.shape[0], 1), dtype="complex"), A))

    # see whether to do an initial OLS model or whether one is provided
    if options["initial"]:
        params, resids, scale = initialModel(options["initial"])
    else:
        soln = olsModel(A, y)
        resids = soln.resids
        scale = getScale(resids, "mad0")

    # if an initial model was not provided but an initial scale was, replace the one here
    if options["scale"]:
        scale = options["scale"]

    # standardised residuals and weights
    weights = getWeights(resids / scale, options["weights"]) * leverageWeights
    # iteratively weighted least squares
    iteration = 0
    while iteration < options["maxiter"]:
        # do the weighted least-squares
        Anew, ynew = applyWeights(A, y, weights)
        paramsNew, _squareResidNew, _rankNew, _sNew = linalg.lstsq(Anew,
                                                                   ynew,
                                                                   rcond=None)
        residsNew = y - np.dot(A, paramsNew)

        if np.sum(np.absolute(residsNew)) < eps():
            return RegressionData(A,
                                  y,
                                  params=paramsNew,
                                  resids=residsNew,
                                  scale=scale,
                                  weights=weights)

        # standardise and calculate weights
        scale = getScale(residsNew, "mad0")
        weightsNew = getWeights(residsNew / scale,
                                options["weights"]) * leverageWeights
        # increment iteration and save weightsNew
        iteration = iteration + 1
        weights = weightsNew
        params = paramsNew

        # check to see whether the change is smaller than the tolerance
        # use the R method of checking change in residuals (can check change in params)
        changeResids = linalg.norm(residsNew - resids) / linalg.norm(residsNew)
        if changeResids < eps():
            # update residuals
            resids = residsNew
            break
        # update residuals
        resids = residsNew

    return RegressionData(A,
                          y,
                          params=params,
                          resids=resids,
                          scale=scale,
                          weights=weights)