示例#1
0
 def barplot(self, func=None, *args, **kwargs):
     """
 Plots a bar chart comparing the phases for each period, as transformed
 by the ``func`` function.
     
 Parameters
 ----------
 func : function, optional
     Function to apply.
     By default, use the :func:`numpy.ma.mean` function.
 args : var
     Mandatory arguments of function ``func``.
 kwargs : var
     Optional arguments of function ``func``.
 
     """
     func = func or ma.mean
     width = 0.2
     colordict = ENSOcolors['fill']
     barlist = []
     pos = self.positions - 3 * width
     for (i, s) in zip(['W', -1, 0, 1],
                      [self.series, self.cold, self.neutral, self.warm]):
         pos += width
         series = ma.apply_along_axis(func, 0, s, *funcopt)
         b = self.bar(pos, series, width=width, bottom=0,
                      color=colordict[i], ecolor='k', capsize=3)
         barlist.append(b[0])
     self.barlist = barlist
     self.figure.axes.append(self)
     self.format_xaxis()
     return barlist
示例#2
0
def hdquantiles_sd(data, prob=list([.25, .5, .75]), axis=None):
    """Computes the standard error of the Harrell-Davis quantile estimates by jackknife.


Parameters
----------
    data : ndarray
        Data array.
    prob : sequence
        Sequence of quantiles to compute.
    axis : int
        Axis along which to compute the quantiles. If None, use a flattened array.

Notes
-----
    The function is restricted to 2D arrays.

    """
    def _hdsd_1D(data, prob):
        "Computes the std error for 1D arrays."
        xsorted = np.sort(data.compressed())
        n = len(xsorted)
        #.........
        hdsd = np.empty(len(prob), float_)
        if n < 2:
            hdsd.flat = np.nan
        #.........
        vv = np.arange(n) / float(n - 1)
        betacdf = beta.cdf
        #
        for (i, p) in enumerate(prob):
            _w = betacdf(vv, (n + 1) * p, (n + 1) * (1 - p))
            w = _w[1:] - _w[:-1]
            mx_ = np.fromiter([
                np.dot(
                    w, xsorted[np.r_[list(range(0, k)),
                                     list(range(k + 1, n))].astype(int_)])
                for k in range(n)
            ],
                              dtype=float_)
            mx_var = np.array(mx_.var(), copy=False,
                              ndmin=1) * n / float(n - 1)
            hdsd[i] = float(n - 1) * np.sqrt(
                np.diag(mx_var).diagonal() / float(n))
        return hdsd

    # Initialization & checks ---------
    data = ma.array(data, copy=False, dtype=float_)
    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None):
        result = _hdsd_1D(data, p)
    else:
        if data.ndim > 2:
            raise ValueError(
                "Array 'data' must be at most two dimensional, but got data.ndim = %d"
                % data.ndim)
        result = ma.apply_along_axis(_hdsd_1D, axis, data, p)
    #
    return ma.fix_invalid(result, copy=False).ravel()
示例#3
0
def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None):
    """
    The standard error of the Harrell-Davis quantile estimates by jackknife.

    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    Returns
    -------
    hdquantiles_sd : MaskedArray
        Standard error of the Harrell-Davis quantile estimates.

    See Also
    --------
    hdquantiles

    """
    def _hdsd_1D(data, prob):
        "Computes the std error for 1D arrays."
        xsorted = np.sort(data.compressed())
        n = len(xsorted)

        hdsd = np.empty(len(prob), float_)
        if n < 2:
            hdsd.flat = np.nan

        vv = np.arange(n) / float(n-1)
        betacdf = beta.cdf

        for (i,p) in enumerate(prob):
            _w = betacdf(vv, (n+1)*p, (n+1)*(1-p))
            w = _w[1:] - _w[:-1]
            mx_ = np.fromiter([w[:k] @ xsorted[:k] + w[k:] @ xsorted[k+1:]
                               for k in range(n)], dtype=float_)
            # mx_var = np.array(mx_.var(), copy=False, ndmin=1) * n / (n - 1)
            # hdsd[i] = (n - 1) * np.sqrt(mx_var / n)
            hdsd[i] = np.sqrt(mx_.var() * (n - 1))
        return hdsd

    # Initialization & checks
    data = ma.array(data, copy=False, dtype=float_)
    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None):
        result = _hdsd_1D(data, p)
    else:
        if data.ndim > 2:
            raise ValueError("Array 'data' must be at most two dimensional, "
                             "but got data.ndim = %d" % data.ndim)
        result = ma.apply_along_axis(_hdsd_1D, axis, data, p)

    return ma.fix_invalid(result, copy=False).ravel()
示例#4
0
def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None):
    """
    The standard error of the Harrell-Davis quantile estimates by jackknife.

    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    Returns
    -------
    hdquantiles_sd : MaskedArray
        Standard error of the Harrell-Davis quantile estimates.

    See Also
    --------
    hdquantiles

    """
    def _hdsd_1D(data, prob):
        "Computes the std error for 1D arrays."
        xsorted = np.sort(data.compressed())
        n = len(xsorted)

        hdsd = np.empty(len(prob), float_)
        if n < 2:
            hdsd.flat = np.nan

        vv = np.arange(n) / float(n-1)
        betacdf = beta.cdf

        for (i,p) in enumerate(prob):
            _w = betacdf(vv, (n+1)*p, (n+1)*(1-p))
            w = _w[1:] - _w[:-1]
            mx_ = np.fromiter([np.dot(w,xsorted[np.r_[list(range(0,k)),
                                                      list(range(k+1,n))].astype(int_)])
                                  for k in range(n)], dtype=float_)
            mx_var = np.array(mx_.var(), copy=False, ndmin=1) * n / float(n-1)
            hdsd[i] = float(n-1) * np.sqrt(np.diag(mx_var).diagonal() / float(n))
        return hdsd

    # Initialization & checks
    data = ma.array(data, copy=False, dtype=float_)
    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None):
        result = _hdsd_1D(data, p)
    else:
        if data.ndim > 2:
            raise ValueError("Array 'data' must be at most two dimensional, "
                             "but got data.ndim = %d" % data.ndim)
        result = ma.apply_along_axis(_hdsd_1D, axis, data, p)

    return ma.fix_invalid(result, copy=False).ravel()
示例#5
0
def hdquantiles_sd(data, prob=list([0.25, 0.5, 0.75]), axis=None):
    """Computes the standard error of the Harrell-Davis quantile estimates by jackknife.


Parameters
----------
    data: ndarray
        Data array.
    prob: sequence
        Sequence of quantiles to compute.
    axis : int
        Axis along which to compute the quantiles. If None, use a flattened array.

Notes
-----
    The function is restricted to 2D arrays.

    """

    def _hdsd_1D(data, prob):
        "Computes the std error for 1D arrays."
        xsorted = np.sort(data.compressed())
        n = len(xsorted)
        # .........
        hdsd = np.empty(len(prob), float_)
        if n < 2:
            hdsd.flat = np.nan
        # .........
        vv = np.arange(n) / float(n - 1)
        betacdf = beta.cdf
        #
        for (i, p) in enumerate(prob):
            _w = betacdf(vv, (n + 1) * p, (n + 1) * (1 - p))
            w = _w[1:] - _w[:-1]
            mx_ = np.fromiter(
                [np.dot(w, xsorted[np.r_[range(0, k), range(k + 1, n)].astype(int_)]) for k in range(n)], dtype=float_
            )
            mx_var = np.array(mx_.var(), copy=False, ndmin=1) * n / float(n - 1)
            hdsd[i] = float(n - 1) * np.sqrt(np.diag(mx_var).diagonal() / float(n))
        return hdsd

    # Initialization & checks ---------
    data = ma.array(data, copy=False, dtype=float_)
    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if axis is None:
        result = _hdsd_1D(data, p)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        result = ma.apply_along_axis(_hdsd_1D, axis, data, p)
    #
    return ma.fix_invalid(result, copy=False).ravel()
def median_cihs(data, alpha=0.05, axis=None):
    """
    Computes the alpha-level confidence interval for the median of the data.

    Uses the Hettmasperger-Sheather method.

    Parameters
    ----------
    data : array_like
        Input data. Masked values are discarded. The input should be 1D only,
        or `axis` should be set to None.
    alpha : float, optional
        Confidence level of the intervals.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    Returns
    -------
    median_cihs
        Alpha level confidence interval.

    """

    def _cihs_1D(data, alpha):
        data = np.sort(data.compressed())
        n = len(data)
        alpha = min(alpha, 1 - alpha)
        k = int(binom._ppf(alpha / 2., n, 0.5))
        gk = binom.cdf(n - k, n, 0.5) - binom.cdf(k - 1, n, 0.5)
        if gk < 1 - alpha:
            k -= 1
            gk = binom.cdf(n - k, n, 0.5) - binom.cdf(k - 1, n, 0.5)
        gkk = binom.cdf(n - k - 1, n, 0.5) - binom.cdf(k, n, 0.5)
        I = (gk - 1 + alpha) / (gk - gkk)
        lambd = (n - k) * I / float(k + (n - 2 * k) * I)
        lims = (lambd * data[k] + (1 - lambd) * data[k - 1],
                lambd * data[n - k - 1] + (1 - lambd) * data[n - k])
        return lims

    data = ma.rray(data, copy=False)
    # Computes quantiles along axis (or globally)
    if (axis is None):
        result = _cihs_1D(data.compressed(), alpha)
    else:
        if data.ndim > 2:
            raise ValueError("Array 'data' must be at most two dimensional, "
                             "but got data.ndim = %d" % data.ndim)
        result = ma.apply_along_axis(_cihs_1D, axis, data, alpha)

    return result
示例#7
0
 def get_psi_bar(self, V=None, zpoint='F'):
     """Doc String"""
     if V is None:
         V = self.mnc('Tav.nc', 'VVEL', mask=self.HFacS[:])
     vflux = V * self.dzf[:, np.newaxis, np.newaxis]
     Vdx = vflux * self.HFacS
     Vdx = ma.mean(Vdx, axis=2) * self.Lx
     psi = ma.cumsum(Vdx, axis=0)
     if zpoint == 'F':
         return psi
     elif zpoint == 'C':
         psi = ma.apply_along_axis(np.vstack, 1,
                                   [np.zeros(self.Ny + 1), psi])
         return 0.5 * (psi[1:] + psi[:-1])
示例#8
0
def median_cihs(data, alpha=0.05, axis=None):
    """
    Computes the alpha-level confidence interval for the median of the data.

    Uses the Hettmasperger-Sheather method.

    Parameters
    ----------
    data : array_like
        Input data. Masked values are discarded. The input should be 1D only,
        or `axis` should be set to None.
    alpha : float, optional
        Confidence level of the intervals.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    Returns
    -------
    median_cihs
        Alpha level confidence interval.

    """

    def _cihs_1D(data, alpha):
        data = np.sort(data.compressed())
        n = len(data)
        alpha = min(alpha, 1 - alpha)
        k = int(binom._ppf(alpha / 2.0, n, 0.5))
        gk = binom.cdf(n - k, n, 0.5) - binom.cdf(k - 1, n, 0.5)
        if gk < 1 - alpha:
            k -= 1
            gk = binom.cdf(n - k, n, 0.5) - binom.cdf(k - 1, n, 0.5)
        gkk = binom.cdf(n - k - 1, n, 0.5) - binom.cdf(k, n, 0.5)
        I = (gk - 1 + alpha) / (gk - gkk)
        lambd = (n - k) * I / float(k + (n - 2 * k) * I)
        lims = (lambd * data[k] + (1 - lambd) * data[k - 1], lambd * data[n - k - 1] + (1 - lambd) * data[n - k])
        return lims

    data = ma.rray(data, copy=False)
    # Computes quantiles along axis (or globally)
    if axis is None:
        result = _cihs_1D(data.compressed(), alpha)
    else:
        if data.ndim > 2:
            raise ValueError("Array 'data' must be at most two dimensional, " "but got data.ndim = %d" % data.ndim)
        result = ma.apply_along_axis(_cihs_1D, axis, data, alpha)

    return result
示例#9
0
def Mstep_part2(X,K,Mu,P,Var,post, minVariance=0.25):
    n,d = np.shape(X) # n data points of dimension d
    N = np.zeros(K)
    mask = (X[:]==0)
    post_T = np.transpose(post)
    X_T = np.transpose(X)
    X_Cu = ma.array(X,mask=mask)
    # X_Cu = get_partial_X(X)

    # print "---------------------STARTING M STEP---------------------"
    N = np.sum(post,axis=0)
    P = np.divide(N,n)

    # update means
    Mu_new = np.dot(post_T,X_Cu)
    Mu_bot = np.dot(post_T,~mask)
    mask_2 = (Mu_bot[:]<1)
    Mu_bot = ma.array(Mu_bot,mask=mask_2) #masks out values <1
    #print Mu_bot
    Mu_new = np.divide(Mu_new, Mu_bot)
    Mu = (Mu * mask_2) + Mu_new.filled(0) #keep original masked out values

    # update variances
    nonzeros = np.apply_along_axis(np.count_nonzero,1,~mask)
    sig_denoms= np.sum(post * np.transpose([nonzeros]),axis=0)
    #print post

    for j in xrange(K):
        norm = lambda x: LA.norm(x - Mu[j])**2
        Var[j] = max(minVariance, np.sum(np.multiply(post_T[j],ma.apply_along_axis(norm,1,X_Cu)))/(sig_denoms[j]))

    # for j in range(K):
    #     # Update parameters
    #     N[j] = math.fsum(post_T[j])
    #     P[j] = N[j]/n
    #     for l in range(d):
    #         Mu_bot = math.fsum([post[t][j] for t in range(n) if X[t][l] > 0])
    #         Mu_top = math.fsum([post[t][j] * X[t][l] for t in range(n) if X[t][l] > 0])
    #         if Mu_bot >= 1:
    #             Mu[j][l] = Mu_top / Mu_bot

        # variances = np.array([variance(X_Cu[t], Mu[j]) for t in range(n)])
        # var_top = np.dot(post_T[j], variances)
        # var_bot = math.fsum( [post[t][j] * len(X_Cu[t]) for t in range(n)] )
        # Var[j] = max(var_top / var_bot, minVariance)
    return (Mu,P,Var)
示例#10
0
def mjci(data, prob=[0.25, 0.5, 0.75], axis=None):
    """
    Returns the Maritz-Jarrett estimators of the standard error of selected
    experimental quantiles of the data.

    Parameters
    ----------
    data: ndarray
        Data array.
    prob: sequence
        Sequence of quantiles to compute.
    axis : int
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    """
    def _mjci_1D(data, p):
        data = np.sort(data.compressed())
        n = data.size
        prob = (np.array(p) * n + 0.5).astype(int_)
        betacdf = beta.cdf
        #
        mj = np.empty(len(prob), float_)
        x = np.arange(1, n + 1, dtype=float_) / n
        y = x - 1. / n
        for (i, m) in enumerate(prob):
            (m1, m2) = (m - 1, n - m)
            W = betacdf(x, m - 1, n - m) - betacdf(y, m - 1, n - m)
            C1 = np.dot(W, data)
            C2 = np.dot(W, data**2)
            mj[i] = np.sqrt(C2 - C1**2)
        return mj

    #
    data = ma.array(data, copy=False)
    if data.ndim > 2:
        raise ValueError(
            "Array 'data' must be at most two dimensional, but got data.ndim = %d"
            % data.ndim)
    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None):
        return _mjci_1D(data, p)
    else:
        return ma.apply_along_axis(_mjci_1D, axis, data, p)
示例#11
0
    def generate_histograms(self):
        if self.state < 8:
            raise InvalidContextBuilderState("Cannot generate histograms without indices")

        self.logger.debug("10) Generating histograms")

        if self.point_coords is self.environment_coords:
            builder = lambda arr: np.bincount(arr[~arr.mask], minlength=self.num_bins)
            self.histograms = ma.apply_along_axis(builder, -1, self.indices)
            self.histograms = self.histograms.data.astype(np.uint16)
        else:
            builder = lambda arr: np.bincount(arr, minlength=self.num_bins)
            self.histograms = np.apply_along_axis(builder, -1, self.indices)
            self.histograms = self.histograms.astype(np.uint16)

        self.state = 10

        return self.histograms
示例#12
0
def mjci(data, prob=[0.25,0.5,0.75], axis=None):
    """
    Returns the Maritz-Jarrett estimators of the standard error of selected
    experimental quantiles of the data.

    Parameters
    ----------
    data: ndarray
        Data array.
    prob: sequence
        Sequence of quantiles to compute.
    axis : int
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    """
    def _mjci_1D(data, p):
        data = np.sort(data.compressed())
        n = data.size
        prob = (np.array(p) * n + 0.5).astype(int_)
        betacdf = beta.cdf

        mj = np.empty(len(prob), float_)
        x = np.arange(1,n+1, dtype=float_) / n
        y = x - 1./n
        for (i,m) in enumerate(prob):
            (m1,m2) = (m-1, n-m)
            W = betacdf(x,m-1,n-m) - betacdf(y,m-1,n-m)
            C1 = np.dot(W,data)
            C2 = np.dot(W,data**2)
            mj[i] = np.sqrt(C2 - C1**2)
        return mj

    data = ma.array(data, copy=False)
    if data.ndim > 2:
        raise ValueError("Array 'data' must be at most two dimensional, "
                         "but got data.ndim = %d" % data.ndim)

    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None):
        return _mjci_1D(data, p)
    else:
        return ma.apply_along_axis(_mjci_1D, axis, data, p)
示例#13
0
def idealfourths(data, axis=None):
    """Returns an estimate of the lower and upper quartiles of the data along
    the given axis, as computed with the ideal fourths.
    """
    def _idf(data):
        x = data.compressed()
        n = len(x)
        if n < 3:
            return [np.nan,np.nan]
        (j,h) = divmod(n/4. + 5/12.,1)
        qlo = (1-h)*x[j-1] + h*x[j]
        k = n - j
        qup = (1-h)*x[k] + h*x[k-1]
        return [qlo, qup]
    data = ma.sort(data, axis=axis).view(MaskedArray)
    if (axis is None):
        return _idf(data)
    else:
        return ma.apply_along_axis(_idf, axis, data)
示例#14
0
def idealfourths(data, axis=None):
    """Returns an estimate of the lower and upper quartiles of the data along
    the given axis, as computed with the ideal fourths.
    """
    def _idf(data):
        x = data.compressed()
        n = len(x)
        if n < 3:
            return [np.nan,np.nan]
        (j,h) = divmod(n/4. + 5/12.,1)
        qlo = (1-h)*x[j-1] + h*x[j]
        k = n - j
        qup = (1-h)*x[k] + h*x[k-1]
        return [qlo, qup]
    data = ma.sort(data, axis=axis).view(MaskedArray)
    if (axis is None):
        return _idf(data)
    else:
        return ma.apply_along_axis(_idf, axis, data)
def idealfourths(data, axis=None):
    """
    Returns an estimate of the lower and upper quartiles.

    Uses the ideal fourths algorithm.

    Parameters
    ----------
    data : array_like
        Input array.
    axis : int, optional
        Axis along which the quartiles are estimated. If None, the arrays are
        flattened.

    Returns
    -------
    idealfourths : {list of floats, masked array}
        Returns the two internal values that divide `data` into four parts
        using the ideal fourths algorithm either along the flattened array
        (if `axis` is None) or along `axis` of `data`.

    """

    def _idf(data):
        x = data.compressed()
        n = len(x)
        if n < 3:
            return [np.nan, np.nan]
        (j, h) = divmod(n / 4. + 5 / 12., 1)
        j = int(j)
        qlo = (1 - h) * x[j - 1] + h * x[j]
        k = n - j
        qup = (1 - h) * x[k] + h * x[k - 1]
        return [qlo, qup]

    data = ma.sort(data, axis=axis).view(MaskedArray)
    if (axis is None):
        return _idf(data)
    else:
        return ma.apply_along_axis(_idf, axis, data)
示例#16
0
def idealfourths(data, axis=None):
    """
    Returns an estimate of the lower and upper quartiles.

    Uses the ideal fourths algorithm.

    Parameters
    ----------
    data : array_like
        Input array.
    axis : int, optional
        Axis along which the quartiles are estimated. If None, the arrays are
        flattened.

    Returns
    -------
    idealfourths : {list of floats, masked array}
        Returns the two internal values that divide `data` into four parts
        using the ideal fourths algorithm either along the flattened array
        (if `axis` is None) or along `axis` of `data`.

    """

    def _idf(data):
        x = data.compressed()
        n = len(x)
        if n < 3:
            return [np.nan, np.nan]
        (j, h) = divmod(n / 4.0 + 5 / 12.0, 1)
        j = int(j)
        qlo = (1 - h) * x[j - 1] + h * x[j]
        k = n - j
        qup = (1 - h) * x[k] + h * x[k - 1]
        return [qlo, qup]

    data = ma.sort(data, axis=axis).view(MaskedArray)
    if axis is None:
        return _idf(data)
    else:
        return ma.apply_along_axis(_idf, axis, data)
示例#17
0
def median_cihs(data, alpha=0.05, axis=None):
    """Computes the alpha-level confidence interval for the median of the data,
following the Hettmasperger-Sheather method.

Parameters
----------
    data : sequence
        Input data. Masked values are discarded. The input should be 1D only, or
        axis should be set to None.
    alpha : float
        Confidence level of the intervals.
    axis : integer
        Axis along which to compute the quantiles. If None, use a flattened array.
    """
    def _cihs_1D(data, alpha):
        data = np.sort(data.compressed())
        n = len(data)
        alpha = min(alpha, 1 - alpha)
        k = int(binom._ppf(alpha / 2., n, 0.5))
        gk = binom.cdf(n - k, n, 0.5) - binom.cdf(k - 1, n, 0.5)
        if gk < 1 - alpha:
            k -= 1
            gk = binom.cdf(n - k, n, 0.5) - binom.cdf(k - 1, n, 0.5)
        gkk = binom.cdf(n - k - 1, n, 0.5) - binom.cdf(k, n, 0.5)
        I = (gk - 1 + alpha) / (gk - gkk)
        lambd = (n - k) * I / float(k + (n - 2 * k) * I)
        lims = (lambd * data[k] + (1 - lambd) * data[k - 1],
                lambd * data[n - k - 1] + (1 - lambd) * data[n - k])
        return lims

    data = ma.rray(data, copy=False)
    # Computes quantiles along axis (or globally)
    if (axis is None):
        result = _cihs_1D(data.compressed(), alpha)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        result = ma.apply_along_axis(_cihs_1D, axis, data, alpha)
    #
    return result
示例#18
0
def median_cihs(data, alpha=0.05, axis=None):
    """Computes the alpha-level confidence interval for the median of the data,
following the Hettmasperger-Sheather method.

Parameters
----------
    data : sequence
        Input data. Masked values are discarded. The input should be 1D only, or
        axis should be set to None.
    alpha : float
        Confidence level of the intervals.
    axis : integer
        Axis along which to compute the quantiles. If None, use a flattened array.
    """

    def _cihs_1D(data, alpha):
        data = np.sort(data.compressed())
        n = len(data)
        alpha = min(alpha, 1 - alpha)
        k = int(binom._ppf(alpha / 2.0, n, 0.5))
        gk = binom.cdf(n - k, n, 0.5) - binom.cdf(k - 1, n, 0.5)
        if gk < 1 - alpha:
            k -= 1
            gk = binom.cdf(n - k, n, 0.5) - binom.cdf(k - 1, n, 0.5)
        gkk = binom.cdf(n - k - 1, n, 0.5) - binom.cdf(k, n, 0.5)
        I = (gk - 1 + alpha) / (gk - gkk)
        lambd = (n - k) * I / float(k + (n - 2 * k) * I)
        lims = (lambd * data[k] + (1 - lambd) * data[k - 1], lambd * data[n - k - 1] + (1 - lambd) * data[n - k])
        return lims

    data = ma.rray(data, copy=False)
    # Computes quantiles along axis (or globally)
    if axis is None:
        result = _cihs_1D(data.compressed(), alpha)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        result = ma.apply_along_axis(_cihs_1D, axis, data, alpha)
    #
    return result
示例#19
0
 def barplot(self, func=None, *args, **kwargs):
     """
 Plots a bar chart comparing the phases for each period, as transformed
 by the ``func`` function.
     
 Parameters
 ----------
 func : function, optional
     Function to apply.
     By default, use the :func:`numpy.ma.mean` function.
 args : var
     Mandatory arguments of function ``func``.
 kwargs : var
     Optional arguments of function ``func``.
 
     """
     func = func or ma.mean
     width = 0.2
     colordict = ENSOcolors['fill']
     barlist = []
     pos = self.positions - 3 * width
     for (i, s) in zip(['W', -1, 0, 1],
                       [self.series, self.cold, self.neutral, self.warm]):
         pos += width
         series = ma.apply_along_axis(func, 0, s, *funcopt)
         b = self.bar(pos,
                      series,
                      width=width,
                      bottom=0,
                      color=colordict[i],
                      ecolor='k',
                      capsize=3)
         barlist.append(b[0])
     self.barlist = barlist
     self.figure.axes.append(self)
     self.format_xaxis()
     return barlist
示例#20
0
def hdquantiles_sd(data, prob=list([.25, .5, .75]), axis=None):
    """
    The standard error of the Harrell-Davis quantile estimates by jackknife.

    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    Returns
    -------
    hdquantiles_sd : MaskedArray
        Standard error of the Harrell-Davis quantile estimates.

    See Also
    --------
    hdquantiles

    """
    def _hdsd_1D(data, prob):
        "Computes the std error for 1D arrays."
        xsorted = np.sort(data.compressed())
        n = len(xsorted)

        hdsd = np.empty(len(prob), float_)
        if n < 2:
            hdsd.flat = np.nan

        vv = np.arange(n) / float(n - 1)
        betacdf = beta.cdf

        for (i, p) in enumerate(prob):
            _w = betacdf(vv, n * p, n * (1 - p))
            w = _w[1:] - _w[:-1]
            # cumulative sum of weights and data points if
            # ith point is left out for jackknife
            mx_ = np.zeros_like(xsorted)
            mx_[1:] = np.cumsum(w * xsorted[:-1])
            # similar but from the right
            mx_[:-1] += np.cumsum(w[::-1] * xsorted[:0:-1])[::-1]
            hdsd[i] = np.sqrt(mx_.var() * (n - 1))
        return hdsd

    # Initialization & checks
    data = ma.array(data, copy=False, dtype=float_)
    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None):
        result = _hdsd_1D(data, p)
    else:
        if data.ndim > 2:
            raise ValueError("Array 'data' must be at most two dimensional, "
                             "but got data.ndim = %d" % data.ndim)
        result = ma.apply_along_axis(_hdsd_1D, axis, data, p)

    return ma.fix_invalid(result, copy=False).ravel()
示例#21
0
def quantile(x,
             probs=DEF_PROBS,
             typ=DEF_TYPE,
             method=DEF_METHOD,
             limit=DEF_LIMIT,
             na_rm=DEF_NARM,
             is_sorted=False):
    """Compute the sample quantiles of any vector distribution.
    
        >>> quantile(x, probs=DEF_PROBS, type = DEF_TYPE, method=DEF_METHOD, limit=DEF_LIMIT, 
             na_rm = DEF_NARM, is_sorted=False)
    """

    ## various parameter checkings

    # check the data
    if isinstance(x, (pd.DataFrame, pd.Series)):
        try:
            x = x.values
        except:
            raise TypeError("conversion type error for input dataset")
    elif not isinstance(x, np.ndarray):
        try:
            x = np.asarray(x)
        except:
            raise TypeError("wrong type for input dataset")
    ndim = x.ndim
    if ndim > 2:
        raise ValueError("array should be 2D at most !")

    # check the probs
    if isinstance(probs, (pd.DataFrame, pd.Series)):
        try:
            probs = probs.values
        except:
            raise TypeError("conversion type error for input probabilities")
    elif isinstance(probs, (list, tuple)):
        try:
            probs = np.array(probs, copy=False, ndmin=1)
        except:
            raise TypeError("conversion type for error input probabilities")
    elif not isinstance(probs, np.ndarray):
        raise TypeError("wrong type for input probabilities")
    # adjust the values: this is taken from R implementation, where alues up to
    # 2e-14 outside that range are accepted and moved to the nearby endpoint
    eps = 100 * np.finfo(np.double).eps
    if (probs < -eps).any() or (probs > 1 + eps).any():
        raise ValueError("probs values outside [0,1]")
    probs = np.maximum(0, np.minimum(1, probs))

    #weights = np.ones(x)
    ## check the weights
    #if isinstance(weights, (pd.DataFrame,pd.Series)):
    #    try:        weights = weights.values
    #    except:     raise TypeError("conversion type error for input weights")
    #elif not isinstance(weights, np.ndarray):
    #    try:        weights = np.asarray(weights)
    #    except:     raise TypeError("wrong type for input weights")
    #if x.shape != weights.shape:
    #    raise ValueError("the length of data and weights must be the same")

    # check parameter typ value
    if typ not in TYPES:
        raise ValueError(
            "typ should be an integer in range [1,{}]!".format(TYPES))

    # check parameter method value
    if method not in METHODS:
        raise ValueError("method should be in {}!".format(METHODS))

    # check parameter method
    if not isinstance(is_sorted, bool):
        raise TypeError("wrong type for boolean flag is_sorted!")

    # check parameter na_rm
    if not isinstance(na_rm, bool):
        raise TypeError("wrong type for boolean flag na_rm!")

    # check parameter limit
    if not isinstance(limit, (list, tuple, np.ndarray)):
        raise TypeError("wrong type for boolean flag limit!")
    if len(limit) != 2:
        raise ValueError("the length of limit must be 2")

    ## algorithm implementation

    def gamma_indice(g, j, typ):
        gamma = np.zeros(len(j))
        if typ == 1:
            gamma[np.where(g > 0)] = 1
            # gamma[np.where(g <= 0)] = 0
        elif typ == 2:
            gamma[np.where(g > 0)] = 1
            gamma[np.where(g <= 0)] = 0.5
        elif typ == 3:
            gamma[np.where(np.logical_or(g != 0, j % 2 == 1))] = 1
        elif typ >= 4:
            gamma = g
        return gamma

    def _canonical_quantile1D(typ, sorted_x, probs):
        """Compute the quantile of a 1D numpy array using the canonical/direct
        approach derived from the original algorithms from Hyndman & Fan, Cunane
        and Filliben.
        """
        # inspired by the _quantiles1D function of mquantiles
        N = len(sorted_x)  # sorted_x.count()
        m_indice = lambda p, i: {1: 0, 2: 0, 3: -0.5, 4: 0, 5: 0.5,         \
                                 6: p, 7: 1-p, 8: (p+1)/3 , 9: (2*p+3)/8,   \
                                 10: .4 + .2 * p, 11: .3175 +.365*p}[i]
        j_indice = lambda p, n, m: np.int_(np.floor(n * p + m))
        g_indice = lambda p, n, m, j: p * n + m - j
        m = m_indice(probs, typ)
        j = j_indice(probs, N, m)
        j_1 = j - 1
        # adjust for the bounds
        j_1[j_1 < 0] = 0
        j[j > N - 1] = N - 1
        x1 = sorted_x[j_1]  # indexes start at 0...
        x2 = sorted_x[j]
        g = g_indice(probs, N, m, j)
        gamma = gamma_indice(g, j, typ)
        return (1 - gamma) * x1 + gamma * x2

    def _mquantile1D(typ, sorted_x, probs):
        """Compute the quantiles of a 1D numpy array following the implementation
        of the _quantiles1D function of mquantiles.
        source: https://github.com/scipy/scipy/blob/master/scipy/stats/mstats_basic.py
        """
        N = len(
            sorted_x
        )  # sorted_x.count() # though ndarray's have no 'count' attribute
        if N == 0:
            return np_ma.array(np.empty(len(probs), dtype=float), mask=True)
        elif N == 1:
            return np_ma.array(np.resize(sorted_x, probs.shape),
                               mask=np_ma.nomask)
        # note that, wrt to the original implementation (see source code mentioned
        # above), we also added the definitions of (alphap,betap) for typ in [1,2,3]
        abp_indice = lambda typ: {1: (0, 1), 2: (0, 1), 3: (-.5, -1.5), 4: (0, 1),  \
                           5: (.5 , .5),  6: (0 , 0),  7:(1 , 1), 8: (1/3, 1/3),    \
                            9: (3/8 , 3/8), 10: (.4,.4), 11: (.3175, .3175)}[typ]
        alphap, betap = abp_indice(typ)
        m = alphap + probs * (1. - alphap - betap)
        aleph = (probs * N + m)
        j = np.floor(aleph.clip(1, N - 1)).astype(int)
        g = (aleph - j).clip(0, 1)
        gamma = gamma_indice(g, j, typ)
        return (1. - gamma) * sorted_x[
            (j - 1).tolist()] + gamma * sorted_x[j.tolist()]

    def _wquantile1D(typ, x, probs, weights):  # not used
        """Compute the weighted quantile of a 1D numpy array.
        """
        # Check the data
        ind_sorted = np.argsort(x)
        sorted_x = x[ind_sorted]
        sorted_weights = weights[ind_sorted]
        # Compute the auxiliary arrays
        Sn = np.cumsum(sorted_weights)
        #assert Sn != 0, "The sum of the weights must not be zero"
        Pn = (Sn - 0.5 * sorted_weights) / np.sum(sorted_weights)
        # Get the value of the weighted median
        return np.interp(probs, Pn, sorted_x)

    ## actual calculation

    # select method
    if method == 'DIRECT':
        _quantile1D = _canonical_quantile1D

    elif method == 'INHERIT':
        _quantile1D = _mquantile1D

    # define input data
    if na_rm is True:
        data = np_ma.array(x, copy=True, mask=np.isnan(x))
        # weights = np_ma.array(x, copy=True, mask = np.isnan(x))
    elif np.isnan(x).any():
        raise ValueError(
            "missing values and NaN's not allowed if 'na_rm' is FALSE")
    else:
        data = np_ma.array(x, copy=False)

    # filter the input data
    if limit is True:
        condition = (limit[0] < data) & (data < limit[1])
        data[~condition.filled(True)] = np_ma.masked

    # sort if not already the case
    if is_sorted is False:
        # ind_sorted = np.argsort(x)
        # sorted_x = x[ind_sorted]
        sorted_data = np_ma.sort(data.compressed())

    # Computes quantiles along axis (or globally)
    if ndim == 1:
        return _quantile1D(typ, data if is_sorted else sorted_data, probs)
    else:
        return np_ma.apply_along_axis(_quantile1D, 1, typ,                         \
                                      data if is_sorted else sorted_data, probs)
def hdquantiles(data, prob=list([.25, .5, .75]), axis=None, var=False, ):
    """
    Computes quantile estimates with the Harrell-Davis method.

    The quantile estimates are calculated as a weighted linear combination
    of order statistics.

    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    var : bool, optional
        Whether to return the variance of the estimate.

    Returns
    -------
    hdquantiles : MaskedArray
        A (p,) array of quantiles (if `var` is False), or a (2,p) array of
        quantiles and variances (if `var` is True), where ``p`` is the
        number of quantiles.

    """

    def _hd_1D(data, prob, var):
        "Computes the HD quantiles for a 1D array. Returns nan for invalid data."
        xsorted = np.squeeze(np.sort(data.compressed().view(ndarray)))
        # Don't use length here, in case we have a numpy scalar
        n = xsorted.size

        hd = np.empty((2, len(prob)), float_)
        if n < 2:
            hd.flat = np.nan
            if var:
                return hd
            return hd[0]

        v = np.arange(n + 1) / float(n)
        betacdf = beta.cdf
        for (i, p) in enumerate(prob):
            _w = betacdf(v, (n + 1) * p, (n + 1) * (1 - p))
            w = _w[1:] - _w[:-1]
            hd_mean = np.dot(w, xsorted)
            hd[0, i] = hd_mean
            #
            hd[1, i] = np.dot(w, (xsorted - hd_mean) ** 2)
            #
        hd[0, prob == 0] = xsorted[0]
        hd[0, prob == 1] = xsorted[-1]
        if var:
            hd[1, prob == 0] = hd[1, prob == 1] = np.nan
            return hd
        return hd[0]

    # Initialization & checks
    data = ma.array(data, copy=False, dtype=float_)
    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None) or (data.ndim == 1):
        result = _hd_1D(data, p, var)
    else:
        if data.ndim > 2:
            raise ValueError("Array 'data' must be at most two dimensional, "
                             "but got data.ndim = %d" % data.ndim)
        result = ma.apply_along_axis(_hd_1D, axis, data, p, var)

    return ma.fix_invalid(result, copy=False)
示例#23
0
def hdquantiles(
        data,
        prob=list([.25, .5, .75]),
        axis=None,
        var=False,
):
    """Computes quantile estimates with the Harrell-Davis method, where the estimates
are calculated as a weighted linear combination of order statistics.

Parameters
----------
    data: ndarray
        Data array.
    prob: sequence
        Sequence of quantiles to compute.
    axis : int
        Axis along which to compute the quantiles. If None, use a flattened array.
    var : boolean
        Whether to return the variance of the estimate.

Returns
-------
    A (p,) array of quantiles (if ``var`` is False), or a (2,p) array of quantiles
    and variances (if ``var`` is True), where ``p`` is the number of quantiles.

Notes
-----
    The function is restricted to 2D arrays.

    """
    def _hd_1D(data, prob, var):
        "Computes the HD quantiles for a 1D array. Returns nan for invalid data."
        xsorted = np.squeeze(np.sort(data.compressed().view(ndarray)))
        # Don't use length here, in case we have a numpy scalar
        n = xsorted.size
        #.........
        hd = np.empty((2, len(prob)), float_)
        if n < 2:
            hd.flat = np.nan
            if var:
                return hd
            return hd[0]
        #.........
        v = np.arange(n + 1) / float(n)
        betacdf = beta.cdf
        for (i, p) in enumerate(prob):
            _w = betacdf(v, (n + 1) * p, (n + 1) * (1 - p))
            w = _w[1:] - _w[:-1]
            hd_mean = np.dot(w, xsorted)
            hd[0, i] = hd_mean
            #
            hd[1, i] = np.dot(w, (xsorted - hd_mean)**2)
            #
        hd[0, prob == 0] = xsorted[0]
        hd[0, prob == 1] = xsorted[-1]
        if var:
            hd[1, prob == 0] = hd[1, prob == 1] = np.nan
            return hd
        return hd[0]

    # Initialization & checks ---------
    data = ma.array(data, copy=False, dtype=float_)
    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None) or (data.ndim == 1):
        result = _hd_1D(data, p, var)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        result = ma.apply_along_axis(_hd_1D, axis, data, p, var)
    #
    return ma.fix_invalid(result, copy=False)
示例#24
0
def hdquantiles(data, prob=list([0.25, 0.5, 0.75]), axis=None, var=False):
    """
    Computes quantile estimates with the Harrell-Davis method.

    The quantile estimates are calculated as a weighted linear combination
    of order statistics.

    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    var : bool, optional
        Whether to return the variance of the estimate.

    Returns
    -------
    hdquantiles : MaskedArray
        A (p,) array of quantiles (if `var` is False), or a (2,p) array of
        quantiles and variances (if `var` is True), where ``p`` is the
        number of quantiles.

    """

    def _hd_1D(data, prob, var):
        "Computes the HD quantiles for a 1D array. Returns nan for invalid data."
        xsorted = np.squeeze(np.sort(data.compressed().view(ndarray)))
        # Don't use length here, in case we have a numpy scalar
        n = xsorted.size

        hd = np.empty((2, len(prob)), float_)
        if n < 2:
            hd.flat = np.nan
            if var:
                return hd
            return hd[0]

        v = np.arange(n + 1) / float(n)
        betacdf = beta.cdf
        for (i, p) in enumerate(prob):
            _w = betacdf(v, (n + 1) * p, (n + 1) * (1 - p))
            w = _w[1:] - _w[:-1]
            hd_mean = np.dot(w, xsorted)
            hd[0, i] = hd_mean
            #
            hd[1, i] = np.dot(w, (xsorted - hd_mean) ** 2)
            #
        hd[0, prob == 0] = xsorted[0]
        hd[0, prob == 1] = xsorted[-1]
        if var:
            hd[1, prob == 0] = hd[1, prob == 1] = np.nan
            return hd
        return hd[0]

    # Initialization & checks
    data = ma.array(data, copy=False, dtype=float_)
    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None) or (data.ndim == 1):
        result = _hd_1D(data, p, var)
    else:
        if data.ndim > 2:
            raise ValueError("Array 'data' must be at most two dimensional, " "but got data.ndim = %d" % data.ndim)
        result = ma.apply_along_axis(_hd_1D, axis, data, p, var)

    return ma.fix_invalid(result, copy=False)
示例#25
0
def whiskerbox(series,
               fsp=None,
               positions=None,
               mode='mquantiles',
               width=0.8,
               wisk=None,
               plot_mean=False,
               logscale=None,
               color=None,
               outliers=None):
    """
    Draws a whisker plot.
    The bottom and top of the boxes correspond to the lower and upper quartiles
    respectively (25th and 75th percentiles).
    

    Parameters
    ----------
    series : Sequence
        Input data. 
        If the sequence is 2D, each column is assumed to represent a different variable.
    fsp : :class:`Subplot`
        Subplot where to draw the data.
        If None, uses the current axe.
    positions : {None, sequence}, optional
        Positions along the x-axis.
        If None, use a scale from 1 to the number of columns.
    mode : {'mquantiles', 'hdquantiles'}, optional
        Type of algorithm used to compute the quantiles. 
        If 'mquantiles', use the classical form :func:`~scipy.stats.mstats.mquantiles`
        If 'hdquantiles', use the Harrell-Davies estimators of the function
        :func:`~scipy.stats.mmorestats.hdquantiles`.
    wisk : {None, float}, optional
        Whiskers size, as a multiplier of the inter-quartile range. 
        If None, the whiskers are drawn between the 5th and 95th percentiles.
    plot_mean : {False, True}, optional
        Whether to overlay the mean on the box.
    color : {None, string}, optional
        Color of the main box.
    outliers : {dictionary}, optional
        Options for plotting outliers.
        By default, the dictionary uses 
        ``dict(marker='x', ms=4, mfc='#999999', ls='')``

    """
    outliers = outliers or dict(
        marker='x',
        ms=4,
        mfc='#999999',
        mec='#999999',
        ls='',
    )
    if fsp is None:
        fsp = pyplot.gca()
    if not fsp._hold:
        fsp.cla()
    # Make sure the series is a masked array
    series = ma.array(series, copy=False, subok=False)
    # Reshape the series ...................
    if series.ndim == 1:
        series = series.reshape(-1, 1)
    elif series.ndim > 2:
        series = np.swapaxes(series, 1, -1).reshape(-1, series.shape[1])
    if positions is None:
        positions = np.arange(1, series.shape[1] + 1)
    # Get the quantiles ....................
    plist = [0.05, 0.25, 0.5, 0.75, 0.95]
    # Harrell-Davies ........
    if mode == 'hdquantiles':
        # 1D data ...........
        if series.ndim == 0:
            (qb, ql, qm, qh, qt) = mstats.hdquantiles(series.ravel(), plist)
        # 2D data ...........
        else:
            (qb, ql, qm, qh, qt) = ma.apply_along_axis(mstats.hdquantiles, 0,
                                                       series, plist)
    # Basic quantiles .......
    else:
        (qb, ql, qm, qh, qt) = mstats.mquantiles(series, plist, axis=0)
    # Get the heights, bottoms, and whiskers positions
    heights = qh - ql
    bottoms = ql
    if wisk is not None:
        hival = qh + wisk * heights
        loval = ql - wisk * heights
    else:
        (hival, loval) = (qt, qb)
    # Plot the whiskers and outliers .......
    for i, pos, xh, xl in np.broadcast(np.arange(len(positions)), positions,
                                       hival, loval):
        x = series[:, i]
        # Get high extreme ..
        wisk_h = x[(x <= xh).filled(False)]
        if len(wisk_h) == 0:
            wisk_h = qh[i]
        else:
            wisk_h = max(wisk_h)
        # Low extremes ......
        wisk_l = x[(x >= xl).filled(False)]
        if len(wisk_l) == 0:
            wisk_l = ql[i]
        else:
            wisk_l = min(wisk_l)
        fsp.plot((pos, pos), (wisk_l, wisk_h), dashes=(1, 1), c='k', zorder=1)
        fsp.plot((pos - 0.25 * width, pos + 0.25 * width), (wisk_l, wisk_l),
                 '-',
                 c='k')
        fsp.plot((pos - 0.25 * width, pos + 0.25 * width), (wisk_h, wisk_h),
                 '-',
                 c='k')
        # Outliers, if any...
        if outliers is not None and len(outliers) > 0:
            flh = x[(x > xh).filled(False)].view(ndarray)
            fll = x[(x < xl).filled(False)].view(ndarray)
            if len(flh) > 0 and len(fll) > 0:
                fsp.plot([pos] * (len(flh) + len(fll)), np.r_[flh, fll],
                         **outliers)
        # Plot the median....
        fsp.plot((pos - 0.5 * width, pos + 0.5 * width), (qm[i], qm[i]),
                 ls='-',
                 c='k',
                 lw=1.2,
                 zorder=99)
        # Plot the mean......
        if plot_mean:
            fsp.plot((pos - 0.5 * width, pos + 0.5 * width),
                     (x.mean(), x.mean()),
                     ls=':',
                     dashes=(1, 1),
                     c='#000000',
                     lw=1.1,
                     zorder=99)


#            fsp.plot((pos,), (x.mean(),), marker='o', color=color, zorder=99)
# Plot the boxes .......................
    bars = fsp.bar(positions - 0.5 * width,
                   heights,
                   width=width,
                   bottom=bottoms,
                   color=color,
                   yerr=None,
                   xerr=None,
                   ecolor='k',
                   capsize=3,
                   zorder=50)
    if logscale:
        fsp.set_yscale('log')
    return bars
示例#26
0
def hdquantiles(data, prob=list([0.25, 0.5, 0.75]), axis=None, var=False):
    """Computes quantile estimates with the Harrell-Davis method, where the estimates
are calculated as a weighted linear combination of order statistics.

Parameters
----------
    data: ndarray
        Data array.
    prob: sequence
        Sequence of quantiles to compute.
    axis : int
        Axis along which to compute the quantiles. If None, use a flattened array.
    var : boolean
        Whether to return the variance of the estimate.

Returns
-------
    A (p,) array of quantiles (if ``var`` is False), or a (2,p) array of quantiles
    and variances (if ``var`` is True), where ``p`` is the number of quantiles.

Notes
-----
    The function is restricted to 2D arrays.

    """

    def _hd_1D(data, prob, var):
        "Computes the HD quantiles for a 1D array. Returns nan for invalid data."
        xsorted = np.squeeze(np.sort(data.compressed().view(ndarray)))
        # Don't use length here, in case we have a numpy scalar
        n = xsorted.size
        # .........
        hd = np.empty((2, len(prob)), float_)
        if n < 2:
            hd.flat = np.nan
            if var:
                return hd
            return hd[0]
        # .........
        v = np.arange(n + 1) / float(n)
        betacdf = beta.cdf
        for (i, p) in enumerate(prob):
            _w = betacdf(v, (n + 1) * p, (n + 1) * (1 - p))
            w = _w[1:] - _w[:-1]
            hd_mean = np.dot(w, xsorted)
            hd[0, i] = hd_mean
            #
            hd[1, i] = np.dot(w, (xsorted - hd_mean) ** 2)
            #
        hd[0, prob == 0] = xsorted[0]
        hd[0, prob == 1] = xsorted[-1]
        if var:
            hd[1, prob == 0] = hd[1, prob == 1] = np.nan
            return hd
        return hd[0]

    # Initialization & checks ---------
    data = ma.array(data, copy=False, dtype=float_)
    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None) or (data.ndim == 1):
        result = _hd_1D(data, p, var)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        result = ma.apply_along_axis(_hd_1D, axis, data, p, var)
    #
    return ma.fix_invalid(result, copy=False)
示例#27
0
def plotting_positions(data, alpha=0.4, beta=0.4, axis=0, masknan=False):
    """Returns the plotting positions (or empirical percentile points) for the
    data.
    Plotting positions are defined as (i-alpha)/(n+1-alpha-beta), where:
        - i is the rank order statistics (starting at 1)
        - n is the number of unmasked values along the given axis
        - alpha and beta are two parameters.

    Typical values for alpha and beta are:
        - (0,1)    : *p(k) = k/n* : linear interpolation of cdf (R, type 4)
        - (.5,.5)  : *p(k) = (k-1/2.)/n* : piecewise linear function (R, type 5)
          (Bliss 1967: "Rankit")
        - (0,0)    : *p(k) = k/(n+1)* : Weibull (R type 6), (Van der Waerden 1952)
        - (1,1)    : *p(k) = (k-1)/(n-1)*. In this case, p(k) = mode[F(x[k])].
          That's R default (R type 7)
        - (1/3,1/3): *p(k) = (k-1/3)/(n+1/3)*. Then p(k) ~ median[F(x[k])].
          The resulting quantile estimates are approximately median-unbiased
          regardless of the distribution of x. (R type 8), (Tukey 1962)
        - (3/8,3/8): *p(k) = (k-3/8)/(n+1/4)*.
          The resulting quantile estimates are approximately unbiased
          if x is normally distributed (R type 9) (Blom 1958)
        - (.4,.4)  : approximately quantile unbiased (Cunnane)
        - (.35,.35): APL, used with PWM

    Parameters
    ----------
    x : sequence
        Input data, as a sequence or array of dimension at most 2.
    prob : sequence
        List of quantiles to compute.
    alpha : {0.4, float} optional
        Plotting positions parameter.
    beta : {0.4, float} optional
        Plotting positions parameter.

    Notes
    -----
    I think the adjustments assume that there are no ties in order to be a reasonable
    approximation to a continuous density function. TODO: check this

    References
    ----------
    unknown,
    dates to original papers from Beasley, Erickson, Allison 2009 Behav Genet
    """
    if isinstance(data, np.ma.MaskedArray):
        if axis is None or data.ndim == 1:
            return stats.mstats.plotting_positions(data, alpha=alpha, beta=beta)
        else:
            return ma.apply_along_axis(stats.mstats.plotting_positions, axis, data, alpha=alpha, beta=beta)
    if masknan:
        nanmask = np.isnan(data)
        if nanmask.any():
            marr = ma.array(data, mask=nanmask)
            #code duplication:
            if axis is None or data.ndim == 1:
                marr = stats.mstats.plotting_positions(marr, alpha=alpha, beta=beta)
            else:
                marr = ma.apply_along_axis(stats.mstats.plotting_positions, axis, marr, alpha=alpha, beta=beta)
            return ma.filled(marr, fill_value=np.nan)

    data = np.asarray(data)
    if data.size == 1:    # use helper function instead
        data = np.atleast_1d(data)
        axis = 0
    if axis is None:
        data = data.ravel()
        axis = 0
    n = data.shape[axis]
    if data.ndim == 1:
        plpos = np.empty(data.shape, dtype=float)
        plpos[data.argsort()] = (np.arange(1,n+1) - alpha)/(n+1.-alpha-beta)
    else:
        #nd assignment instead of second argsort doesn't look easy
        plpos = (data.argsort(axis).argsort(axis) + 1. - alpha)/(n+1.-alpha-beta)
    return plpos
示例#28
0
def mquantiles(a,
               prob=list([.25, .5, .75]),
               alphap=.4,
               betap=.4,
               axis=None,
               limit=()):
    """
    Computes empirical quantiles for a data array.

    Samples quantile are defined by ``Q(p) = (1-gamma)*x[j] + gamma*x[j+1]``,
    where ``x[j]`` is the j-th order statistic, and gamma is a function of
    ``j = floor(n*p + m)``, ``m = alphap + p*(1 - alphap - betap)`` and
    ``g = n*p + m - j``.

    Reinterpreting the above equations to compare to **R** lead to the
    equation: ``p(k) = (k - alphap)/(n + 1 - alphap - betap)``

    Typical values of (alphap,betap) are:
        - (0,1)    : ``p(k) = k/n`` : linear interpolation of cdf
          (**R** type 4)
        - (.5,.5)  : ``p(k) = (k - 1/2.)/n`` : piecewise linear function
          (**R** type 5)
        - (0,0)    : ``p(k) = k/(n+1)`` :
          (**R** type 6)
        - (1,1)    : ``p(k) = (k-1)/(n-1)``: p(k) = mode[F(x[k])].
          (**R** type 7, **R** default)
        - (1/3,1/3): ``p(k) = (k-1/3)/(n+1/3)``: Then p(k) ~ median[F(x[k])].
          The resulting quantile estimates are approximately median-unbiased
          regardless of the distribution of x.
          (**R** type 8)
        - (3/8,3/8): ``p(k) = (k-3/8)/(n+1/4)``: Blom.
          The resulting quantile estimates are approximately unbiased
          if x is normally distributed
          (**R** type 9)
        - (.4,.4)  : approximately quantile unbiased (Cunnane)
        - (.35,.35): APL, used with PWM

    Parameters
    ----------
    a : array_like
        Input data, as a sequence or array of dimension at most 2.
    prob : array_like, optional
        List of quantiles to compute.
    alphap : float, optional
        Plotting positions parameter, default is 0.4.
    betap : float, optional
        Plotting positions parameter, default is 0.4.
    axis : int, optional
        Axis along which to perform the trimming.
        If None (default), the input array is first flattened.
    limit : tuple
        Tuple of (lower, upper) values.
        Values of `a` outside this open interval are ignored.

    Returns
    -------
    mquantiles : MaskedArray
        An array containing the calculated quantiles.

    Notes
    -----
    This formulation is very similar to **R** except the calculation of
    ``m`` from ``alphap`` and ``betap``, where in **R** ``m`` is defined
    with each type.

    References
    ----------
    .. [1] *R* statistical software at http://www.r-project.org/

    Examples
    --------
    >>> from scipy.stats.mstats import mquantiles
    >>> a = np.array([6., 47., 49., 15., 42., 41., 7., 39., 43., 40., 36.])
    >>> mquantiles(a)
    array([ 19.2,  40. ,  42.8])

    Using a 2D array, specifying axis and limit.

    >>> data = np.array([[   6.,    7.,    1.],
                         [  47.,   15.,    2.],
                         [  49.,   36.,    3.],
                         [  15.,   39.,    4.],
                         [  42.,   40., -999.],
                         [  41.,   41., -999.],
                         [   7., -999., -999.],
                         [  39., -999., -999.],
                         [  43., -999., -999.],
                         [  40., -999., -999.],
                         [  36., -999., -999.]])
    >>> mquantiles(data, axis=0, limit=(0, 50))
    array([[ 19.2 ,  14.6 ,   1.45],
           [ 40.  ,  37.5 ,   2.5 ],
           [ 42.8 ,  40.05,   3.55]])

    >>> data[:, 2] = -999.
    >>> mquantiles(data, axis=0, limit=(0, 50))
    masked_array(data =
     [[19.2 14.6 --]
     [40.0 37.5 --]
     [42.8 40.05 --]],
                 mask =
     [[False False  True]
      [False False  True]
      [False False  True]],
           fill_value = 1e+20)

    """
    def _quantiles1D(data, m, p):
        x = np.sort(data.compressed())
        n = len(x)
        if n == 0:
            return ma.array(np.empty(len(p), dtype=float), mask=True)
        elif n == 1:
            return ma.array(np.resize(x, p.shape), mask=nomask)
        aleph = (n * p + m)
        k = np.floor(aleph.clip(1, n - 1)).astype(int)
        gamma = (aleph - k).clip(0, 1)
        return (1. - gamma) * x[(k - 1).tolist()] + gamma * x[k.tolist()]

    # Initialization & checks ---------
    data = ma.array(a, copy=False)
    if data.ndim > 2:
        raise TypeError("Array should be 2D at most !")
    #
    if limit:
        condition = (limit[0] < data) & (data < limit[1])
        data[~condition.filled(True)] = masked
    #
    p = np.array(prob, copy=False, ndmin=1)
    m = alphap + p * (1. - alphap - betap)
    # Computes quantiles along axis (or globally)
    if (axis is None):
        return _quantiles1D(data, m, p)
    return ma.apply_along_axis(_quantiles1D, axis, data, m, p)
示例#29
0
def whiskerbox(
    series,
    fsp=None,
    positions=None,
    mode="mquantiles",
    width=0.8,
    wisk=None,
    plot_mean=False,
    logscale=None,
    color=None,
    outliers=None,
):
    """
    Draws a whisker plot.
    The bottom and top of the boxes correspond to the lower and upper quartiles
    respectively (25th and 75th percentiles).
    

    Parameters
    ----------
    series : Sequence
        Input data. 
        If the sequence is 2D, each column is assumed to represent a different variable.
    fsp : :class:`Subplot`
        Subplot where to draw the data.
        If None, uses the current axe.
    positions : {None, sequence}, optional
        Positions along the x-axis.
        If None, use a scale from 1 to the number of columns.
    mode : {'mquantiles', 'hdquantiles'}, optional
        Type of algorithm used to compute the quantiles. 
        If 'mquantiles', use the classical form :func:`~scipy.stats.mstats.mquantiles`
        If 'hdquantiles', use the Harrell-Davies estimators of the function
        :func:`~scipy.stats.mmorestats.hdquantiles`.
    wisk : {None, float}, optional
        Whiskers size, as a multiplier of the inter-quartile range. 
        If None, the whiskers are drawn between the 5th and 95th percentiles.
    plot_mean : {False, True}, optional
        Whether to overlay the mean on the box.
    color : {None, string}, optional
        Color of the main box.
    outliers : {dictionary}, optional
        Options for plotting outliers.
        By default, the dictionary uses 
        ``dict(marker='x', ms=4, mfc='#999999', ls='')``

    """
    outliers = outliers or dict(marker="x", ms=4, mfc="#999999", mec="#999999", ls="")
    if fsp is None:
        fsp = pyplot.gca()
    if not fsp._hold:
        fsp.cla()
    # Make sure the series is a masked array
    series = ma.array(series, copy=False, subok=False)
    # Reshape the series ...................
    if series.ndim == 1:
        series = series.reshape(-1, 1)
    elif series.ndim > 2:
        series = np.swapaxes(series, 1, -1).reshape(-1, series.shape[1])
    if positions is None:
        positions = np.arange(1, series.shape[1] + 1)
    # Get the quantiles ....................
    plist = [0.05, 0.25, 0.5, 0.75, 0.95]
    # Harrell-Davies ........
    if mode == "hdquantiles":
        # 1D data ...........
        if series.ndim == 0:
            (qb, ql, qm, qh, qt) = mstats.hdquantiles(series.ravel(), plist)
        # 2D data ...........
        else:
            (qb, ql, qm, qh, qt) = ma.apply_along_axis(mstats.hdquantiles, 0, series, plist)
    # Basic quantiles .......
    else:
        (qb, ql, qm, qh, qt) = mstats.mquantiles(series, plist, axis=0)
    # Get the heights, bottoms, and whiskers positions
    heights = qh - ql
    bottoms = ql
    if wisk is not None:
        hival = qh + wisk * heights
        loval = ql - wisk * heights
    else:
        (hival, loval) = (qt, qb)
    # Plot the whiskers and outliers .......
    for i, pos, xh, xl in np.broadcast(np.arange(len(positions)), positions, hival, loval):
        x = series[:, i]
        # Get high extreme ..
        wisk_h = x[(x <= xh).filled(False)]
        if len(wisk_h) == 0:
            wisk_h = qh[i]
        else:
            wisk_h = max(wisk_h)
        # Low extremes ......
        wisk_l = x[(x >= xl).filled(False)]
        if len(wisk_l) == 0:
            wisk_l = ql[i]
        else:
            wisk_l = min(wisk_l)
        fsp.plot((pos, pos), (wisk_l, wisk_h), dashes=(1, 1), c="k", zorder=1)
        fsp.plot((pos - 0.25 * width, pos + 0.25 * width), (wisk_l, wisk_l), "-", c="k")
        fsp.plot((pos - 0.25 * width, pos + 0.25 * width), (wisk_h, wisk_h), "-", c="k")
        # Outliers, if any...
        if outliers is not None and len(outliers) > 0:
            flh = x[(x > xh).filled(False)].view(ndarray)
            fll = x[(x < xl).filled(False)].view(ndarray)
            if len(flh) > 0 and len(fll) > 0:
                fsp.plot([pos] * (len(flh) + len(fll)), np.r_[flh, fll], **outliers)
        # Plot the median....
        fsp.plot((pos - 0.5 * width, pos + 0.5 * width), (qm[i], qm[i]), ls="-", c="k", lw=1.2, zorder=99)
        # Plot the mean......
        if plot_mean:
            fsp.plot(
                (pos - 0.5 * width, pos + 0.5 * width),
                (x.mean(), x.mean()),
                ls=":",
                dashes=(1, 1),
                c="#000000",
                lw=1.1,
                zorder=99,
            )
    #            fsp.plot((pos,), (x.mean(),), marker='o', color=color, zorder=99)
    # Plot the boxes .......................
    bars = fsp.bar(
        positions - 0.5 * width,
        heights,
        width=width,
        bottom=bottoms,
        color=color,
        yerr=None,
        xerr=None,
        ecolor="k",
        capsize=3,
        zorder=50,
    )
    if logscale:
        fsp.set_yscale("log")
    return bars
示例#30
0
def plotting_positions(data, alpha=0.4, beta=0.4, axis=0, masknan=False):
    """Returns the plotting positions (or empirical percentile points) for the
    data.
    Plotting positions are defined as (i-alpha)/(n+1-alpha-beta), where:
        - i is the rank order statistics (starting at 1)
        - n is the number of unmasked values along the given axis
        - alpha and beta are two parameters.

    Typical values for alpha and beta are:
        - (0,1)    : *p(k) = k/n* : linear interpolation of cdf (R, type 4)
        - (.5,.5)  : *p(k) = (k-1/2.)/n* : piecewise linear function (R, type 5)
          (Bliss 1967: "Rankit")
        - (0,0)    : *p(k) = k/(n+1)* : Weibull (R type 6), (Van der Waerden 1952)
        - (1,1)    : *p(k) = (k-1)/(n-1)*. In this case, p(k) = mode[F(x[k])].
          That's R default (R type 7)
        - (1/3,1/3): *p(k) = (k-1/3)/(n+1/3)*. Then p(k) ~ median[F(x[k])].
          The resulting quantile estimates are approximately median-unbiased
          regardless of the distribution of x. (R type 8), (Tukey 1962)
        - (3/8,3/8): *p(k) = (k-3/8)/(n+1/4)*.
          The resulting quantile estimates are approximately unbiased
          if x is normally distributed (R type 9) (Blom 1958)
        - (.4,.4)  : approximately quantile unbiased (Cunnane)
        - (.35,.35): APL, used with PWM

    Parameters
    ----------
    x : sequence
        Input data, as a sequence or array of dimension at most 2.
    prob : sequence
        List of quantiles to compute.
    alpha : {0.4, float} optional
        Plotting positions parameter.
    beta : {0.4, float} optional
        Plotting positions parameter.

    Notes
    -----
    I think the adjustments assume that there are no ties in order to be a reasonable
    approximation to a continuous density function. TODO: check this

    References
    ----------
    unknown,
    dates to original papers from Beasley, Erickson, Allison 2009 Behav Genet
    """
    if isinstance(data, np.ma.MaskedArray):
        if axis is None or data.ndim == 1:
            return stats.mstats.plotting_positions(data,
                                                   alpha=alpha,
                                                   beta=beta)
        else:
            return ma.apply_along_axis(stats.mstats.plotting_positions,
                                       axis,
                                       data,
                                       alpha=alpha,
                                       beta=beta)
    if masknan:
        nanmask = np.isnan(data)
        if nanmask.any():
            marr = ma.array(data, mask=nanmask)
            #code duplication:
            if axis is None or data.ndim == 1:
                marr = stats.mstats.plotting_positions(marr,
                                                       alpha=alpha,
                                                       beta=beta)
            else:
                marr = ma.apply_along_axis(stats.mstats.plotting_positions,
                                           axis,
                                           marr,
                                           alpha=alpha,
                                           beta=beta)
            return ma.filled(marr, fill_value=np.nan)

    data = np.asarray(data)
    if data.size == 1:  # use helper function instead
        data = np.atleast_1d(data)
        axis = 0
    if axis is None:
        data = data.ravel()
        axis = 0
    n = data.shape[axis]
    if data.ndim == 1:
        plpos = np.empty(data.shape, dtype=float)
        plpos[data.argsort()] = (np.arange(1, n + 1) - alpha) / (n + 1. -
                                                                 alpha - beta)
    else:
        #nd assignment instead of second argsort does not look easy
        plpos = (data.argsort(axis).argsort(axis) + 1. -
                 alpha) / (n + 1. - alpha - beta)
    return plpos
示例#31
0
文件: mstats.py 项目: Aleyasen/pystan
def mquantiles(a, prob=list([.25,.5,.75]), alphap=.4, betap=.4, axis=None,
               limit=()):
    """
    Computes empirical quantiles for a data array.

    Samples quantile are defined by ``Q(p) = (1-gamma)*x[j] + gamma*x[j+1]``,
    where ``x[j]`` is the j-th order statistic, and gamma is a function of
    ``j = floor(n*p + m)``, ``m = alphap + p*(1 - alphap - betap)`` and
    ``g = n*p + m - j``.

    Reinterpreting the above equations to compare to **R** lead to the
    equation: ``p(k) = (k - alphap)/(n + 1 - alphap - betap)``

    Typical values of (alphap,betap) are:
        - (0,1)    : ``p(k) = k/n`` : linear interpolation of cdf
          (**R** type 4)
        - (.5,.5)  : ``p(k) = (k - 1/2.)/n`` : piecewise linear function
          (**R** type 5)
        - (0,0)    : ``p(k) = k/(n+1)`` :
          (**R** type 6)
        - (1,1)    : ``p(k) = (k-1)/(n-1)``: p(k) = mode[F(x[k])].
          (**R** type 7, **R** default)
        - (1/3,1/3): ``p(k) = (k-1/3)/(n+1/3)``: Then p(k) ~ median[F(x[k])].
          The resulting quantile estimates are approximately median-unbiased
          regardless of the distribution of x.
          (**R** type 8)
        - (3/8,3/8): ``p(k) = (k-3/8)/(n+1/4)``: Blom.
          The resulting quantile estimates are approximately unbiased
          if x is normally distributed
          (**R** type 9)
        - (.4,.4)  : approximately quantile unbiased (Cunnane)
        - (.35,.35): APL, used with PWM

    Parameters
    ----------
    a : array_like
        Input data, as a sequence or array of dimension at most 2.
    prob : array_like, optional
        List of quantiles to compute.
    alphap : float, optional
        Plotting positions parameter, default is 0.4.
    betap : float, optional
        Plotting positions parameter, default is 0.4.
    axis : int, optional
        Axis along which to perform the trimming.
        If None (default), the input array is first flattened.
    limit : tuple
        Tuple of (lower, upper) values.
        Values of `a` outside this open interval are ignored.

    Returns
    -------
    mquantiles : MaskedArray
        An array containing the calculated quantiles.

    Notes
    -----
    This formulation is very similar to **R** except the calculation of
    ``m`` from ``alphap`` and ``betap``, where in **R** ``m`` is defined
    with each type.

    References
    ----------
    .. [1] *R* statistical software at http://www.r-project.org/

    Examples
    --------
    >>> from scipy.stats.mstats import mquantiles
    >>> a = np.array([6., 47., 49., 15., 42., 41., 7., 39., 43., 40., 36.])
    >>> mquantiles(a)
    array([ 19.2,  40. ,  42.8])

    Using a 2D array, specifying axis and limit.

    >>> data = np.array([[   6.,    7.,    1.],
                         [  47.,   15.,    2.],
                         [  49.,   36.,    3.],
                         [  15.,   39.,    4.],
                         [  42.,   40., -999.],
                         [  41.,   41., -999.],
                         [   7., -999., -999.],
                         [  39., -999., -999.],
                         [  43., -999., -999.],
                         [  40., -999., -999.],
                         [  36., -999., -999.]])
    >>> mquantiles(data, axis=0, limit=(0, 50))
    array([[ 19.2 ,  14.6 ,   1.45],
           [ 40.  ,  37.5 ,   2.5 ],
           [ 42.8 ,  40.05,   3.55]])

    >>> data[:, 2] = -999.
    >>> mquantiles(data, axis=0, limit=(0, 50))
    masked_array(data =
     [[19.2 14.6 --]
     [40.0 37.5 --]
     [42.8 40.05 --]],
                 mask =
     [[False False  True]
      [False False  True]
      [False False  True]],
           fill_value = 1e+20)

    """
    def _quantiles1D(data,m,p):
        x = np.sort(data.compressed())
        n = len(x)
        if n == 0:
            return ma.array(np.empty(len(p), dtype=float), mask=True)
        elif n == 1:
            return ma.array(np.resize(x, p.shape), mask=nomask)
        aleph = (n*p + m)
        k = np.floor(aleph.clip(1, n-1)).astype(int)
        gamma = (aleph-k).clip(0,1)
        return (1.-gamma)*x[(k-1).tolist()] + gamma*x[k.tolist()]

    # Initialization & checks ---------
    data = ma.array(a, copy=False)
    if data.ndim > 2:
        raise TypeError("Array should be 2D at most !")
    #
    if limit:
        condition = (limit[0] < data) & (data < limit[1])
        data[~condition.filled(True)] = masked
    #
    p = np.array(prob, copy=False, ndmin=1)
    m = alphap + p*(1.-alphap-betap)
    # Computes quantiles along axis (or globally)
    if (axis is None):
        return _quantiles1D(data, m, p)
    return ma.apply_along_axis(_quantiles1D, axis, data, m, p)