Exemplo n.º 1
0
def dispersion_idx(data,
                   t=None,
                   u=None,
                   umin=None,
                   umax=None,
                   nu=None,
                   nmin=10,
                   tb=1,
                   alpha=0.05,
                   plotflag=False):
    '''Return Dispersion Index vs threshold

    Parameters
    ----------
    data, ti : array_like
        data values and sampled times, respectively.
    u :  array-like
        threshold values (default linspace(umin, umax, nu))
    umin, umax : real scalars
        Minimum and maximum threshold, respectively
        (default min(data), max(data)).
    nu : scalar integer
        number of threshold values (default min(N-nmin,100))
    nmin : scalar integer
        Minimum number of extremes to include. (Default 10).
    tb : Real scalar
        Block period (same unit as the sampled times)  (default 1)
    alpha : real scalar
        Confidence coefficient (default 0.05)
    plotflag: bool

    Returns
    -------
    DI : PlotData object
        Dispersion index
    b_u : real scalar
        threshold where the number of exceedances in a fixed period (Tb) is
        consistent with a Poisson process.
    ok_u : array-like
        all thresholds where the number of exceedances in a fixed period (Tb)
        is consistent with a Poisson process.

    Notes
    ------
    DISPRSNIDX estimate the Dispersion Index (DI) as function of threshold.
    DI measures the homogenity of data and the purpose of DI is to determine
    the threshold where the number of exceedances in a fixed period (Tb) is
    consistent with a Poisson process. For a Poisson process the DI is one.
    Thus the threshold should be so high that DI is not significantly
    different from 1.

    The Poisson hypothesis is not rejected if the estimated DI is between:

    chi2(alpha/2, M-1)/(M-1)< DI < chi^2(1 - alpha/2, M-1 }/(M - 1)

    where M is the total number of fixed periods/blocks -generally
    the total number of years in the sample.

    Example
    -------
    >>> import wafo.data
    >>> xn = wafo.data.sea()
    >>> t, data = xn.T
    >>> Ie = findpot(data,t,0,5);
    >>> di, u, ok_u = dispersion_idx(data[Ie],t[Ie],tb=100)
    >>> h = di.plot() # a threshold around 1 seems appropriate.
    >>> round(u*100)/100
    1.03

    vline(u)

    See also
    --------
    reslife,
    fitgenparrange,
    extremal_idx

    References
    ----------
    Ribatet, M. A.,(2006),
    A User's Guide to the POT Package (Version 1.0)
    month = {August},
    url = {http://cran.r-project.org/}

    Cunnane, C. (1979) Note on the poisson assumption in
    partial duration series model. Water Resource Research, 15\bold{(2)}
         :489--494.}
    '''

    n = len(data)
    if t is None:
        ti = arange(n)
    else:
        ti = arr(t) - min(t)

    t1 = np.empty(ti.shape, dtype=int)
    t1[:] = np.floor(ti / tb)

    if u is None:
        sd = np.sort(data)

        nmin = max(nmin, 0)
        if 2 * nmin > n:
            warnings.warn('nmin possibly too large!')

        sdmax, sdmin = sd[-nmin], sd[0]
        umax = sdmax if umax is None else min(umax, sdmax)
        umin = sdmin if umin is None else max(umin, sdmin)

        if nu is None:
            nu = min(n - nmin, 100)

        u = linspace(umin, umax, nu)

    nu = len(u)

    di = np.zeros(nu)

    d = arr(data)

    mint = int(min(t1))  # ; % mint should be 0.
    maxt = int(max(t1))
    M = maxt - mint + 1
    occ = np.zeros(M)

    for ix, tresh in enumerate(u.tolist()):
        excess = (d > tresh)
        lambda_ = excess.sum() / M
        for block in range(M):
            occ[block] = sum(excess[t1 == block])

        di[ix] = occ.var() / lambda_

    p = 1 - alpha

    diLo = _invchi2(1 - alpha / 2, M - 1) / (M - 1)
    diUp = _invchi2(alpha / 2, M - 1) / (M - 1)

    # Find appropriate threshold
    k1, = np.where((diLo < di) & (di < diUp))
    if len(k1) > 0:
        ok_u = u[k1]
        b_di = (di[k1].mean() < di[k1])
        k = b_di.argmax()
        b_u = ok_u[k]
    else:
        b_u = ok_u = None

    CItxt = '%d%s CI' % (100 * p, '%')
    titleTxt = 'Dispersion Index plot'

    res = PlotData(di,
                   u,
                   title=titleTxt,
                   labx='Threshold',
                   laby='Dispersion Index')
    #'caption',CItxt);
    res.workspace = dict(umin=umin, umax=umax, nu=nu, nmin=nmin, alpha=alpha)
    res.children = [
        PlotData(vstack([diLo * ones(nu), diUp * ones(nu)]).T,
                 u,
                 xlab='Threshold',
                 title=CItxt)
    ]
    res.plot_args_children = ['--r']
    if plotflag:
        res.plot(di)
    return res, b_u, ok_u
Exemplo n.º 2
0
def dispersion_idx(
    data, t=None, u=None, umin=None, umax=None, nu=None, nmin=10, tb=1,
        alpha=0.05, plotflag=False):
    '''Return Dispersion Index vs threshold

    Parameters
    ----------
    data, ti : array_like
        data values and sampled times, respectively.
    u :  array-like
        threshold values (default linspace(umin, umax, nu))
    umin, umax : real scalars
        Minimum and maximum threshold, respectively
        (default min(data), max(data)).
    nu : scalar integer
        number of threshold values (default min(N-nmin,100))
    nmin : scalar integer
        Minimum number of extremes to include. (Default 10).
    tb : Real scalar
        Block period (same unit as the sampled times)  (default 1)
    alpha : real scalar
        Confidence coefficient (default 0.05)
    plotflag: bool

    Returns
    -------
    DI : PlotData object
        Dispersion index
    b_u : real scalar
        threshold where the number of exceedances in a fixed period (Tb) is
        consistent with a Poisson process.
    ok_u : array-like
        all thresholds where the number of exceedances in a fixed period (Tb)
        is consistent with a Poisson process.

    Notes
    ------
    DISPRSNIDX estimate the Dispersion Index (DI) as function of threshold.
    DI measures the homogenity of data and the purpose of DI is to determine
    the threshold where the number of exceedances in a fixed period (Tb) is
    consistent with a Poisson process. For a Poisson process the DI is one.
    Thus the threshold should be so high that DI is not significantly
    different from 1.

    The Poisson hypothesis is not rejected if the estimated DI is between:

    chi2(alpha/2, M-1)/(M-1)< DI < chi^2(1 - alpha/2, M-1 }/(M - 1)

    where M is the total number of fixed periods/blocks -generally
    the total number of years in the sample.

    Example
    -------
    >>> import wafo.data
    >>> xn = wafo.data.sea()
    >>> t, data = xn.T
    >>> Ie = findpot(data,t,0,5);
    >>> di, u, ok_u = dispersion_idx(data[Ie],t[Ie],tb=100)
    >>> h = di.plot() # a threshold around 1 seems appropriate.
    >>> round(u*100)/100
    1.03

    vline(u)

    See also
    --------
    reslife,
    fitgenparrange,
    extremal_idx

    References
    ----------
    Ribatet, M. A.,(2006),
    A User's Guide to the POT Package (Version 1.0)
    month = {August},
    url = {http://cran.r-project.org/}

    Cunnane, C. (1979) Note on the poisson assumption in
    partial duration series model. Water Resource Research, 15\bold{(2)}
         :489--494.}
    '''

    n = len(data)
    if t is None:
        ti = arange(n)
    else:
        ti = arr(t) - min(t)

    t1 = np.empty(ti.shape, dtype=int)
    t1[:] = np.floor(ti / tb)

    if u is None:
        sd = np.sort(data)

        nmin = max(nmin, 0)
        if 2 * nmin > n:
            warnings.warn('nmin possibly too large!')

        sdmax, sdmin = sd[-nmin], sd[0]
        umax = sdmax if umax is None else min(umax, sdmax)
        umin = sdmin if umin is None else max(umin, sdmin)

        if nu is None:
            nu = min(n - nmin, 100)

        u = linspace(umin, umax, nu)

    nu = len(u)

    di = np.zeros(nu)

    d = arr(data)

    mint = int(min(t1))  # ; % mint should be 0.
    maxt = int(max(t1))
    M = maxt - mint + 1
    occ = np.zeros(M)

    for ix, tresh in enumerate(u.tolist()):
        excess = (d > tresh)
        lambda_ = excess.sum() / M
        for block in range(M):
            occ[block] = sum(excess[t1 == block])

        di[ix] = occ.var() / lambda_

    p = 1 - alpha

    diLo = _invchi2(1 - alpha / 2, M - 1) / (M - 1)
    diUp = _invchi2(alpha / 2, M - 1) / (M - 1)

    # Find appropriate threshold
    k1, = np.where((diLo < di) & (di < diUp))
    if len(k1) > 0:
        ok_u = u[k1]
        b_di = (di[k1].mean() < di[k1])
        k = b_di.argmax()
        b_u = ok_u[k]
    else:
        b_u = ok_u = None

    CItxt = '%d%s CI' % (100 * p, '%')
    titleTxt = 'Dispersion Index plot'

    res = PlotData(di, u, title=titleTxt,
                   labx='Threshold', laby='Dispersion Index')
        #'caption',CItxt);
    res.workspace = dict(umin=umin, umax=umax, nu=nu, nmin=nmin, alpha=alpha)
    res.children = [
        PlotData(vstack([diLo * ones(nu), diUp * ones(nu)]).T, u,
                 xlab='Threshold', title=CItxt)]
    res.plot_args_children = ['--r']
    if plotflag:
        res.plot(di)
    return res, b_u, ok_u
Exemplo n.º 3
0
def reslife(data,
            u=None,
            umin=None,
            umax=None,
            nu=None,
            nmin=3,
            alpha=0.05,
            plotflag=False):
    '''
    Return Mean Residual Life, i.e., mean excesses vs thresholds

    Parameters
    ---------
    data : array_like
        vector of data of length N.
    u :  array-like
        threshold values (default linspace(umin, umax, nu))
    umin, umax : real scalars
        Minimum and maximum threshold, respectively
        (default min(data), max(data)).
    nu : scalar integer
        number of threshold values (default min(N-nmin,100))
    nmin : scalar integer
        Minimum number of extremes to include. (Default 3).
    alpha : real scalar
        Confidence coefficient (default 0.05)
    plotflag: bool

    Returns
    -------
    mrl : PlotData object
        Mean residual life values, i.e., mean excesses over thresholds, u.

    Notes
    -----
    RESLIFE estimate mean excesses over thresholds. The purpose of MRL is
    to determine the threshold where the upper tail of the data can be
    approximated with the generalized Pareto distribution (GPD). The GPD is
    appropriate for the tail, if the MRL is a linear function of the
    threshold, u. Theoretically in the GPD model

        E(X-u0|X>u0) = s0/(1+k)
        E(X-u |X>u)  = s/(1+k) = (s0 -k*u)/(1+k)   for u>u0

    where k,s is the shape and scale parameter, respectively.
    s0 = scale parameter for threshold u0<u.

    Example
    -------
    >>> import wafo
    >>> R = wafo.stats.genpareto.rvs(0.1,2,2,size=100)
    >>> mrl = reslife(R,nu=20)
    >>> h = mrl.plot()

    See also
    ---------
    genpareto
    fitgenparrange, disprsnidx
    '''
    if u is None:
        sd = np.sort(data)
        n = len(data)

        nmin = max(nmin, 0)
        if 2 * nmin > n:
            warnings.warn('nmin possibly too large!')

        sdmax, sdmin = sd[-nmin], sd[0]
        umax = sdmax if umax is None else min(umax, sdmax)
        umin = sdmin if umin is None else max(umin, sdmin)

        if nu is None:
            nu = min(n - nmin, 100)

        u = linspace(umin, umax, nu)

    nu = len(u)

    #mrl1 = valarray(nu)
    #srl = valarray(nu)
    #num = valarray(nu)

    mean_and_std = lambda data1: (data1.mean(), data1.std(), data1.size)
    dat = arr(data)
    tmp = arr([mean_and_std(dat[dat > tresh] - tresh) for tresh in u.tolist()])

    mrl, srl, num = tmp.T
    p = 1 - alpha
    alpha2 = alpha / 2

    # Approximate P% confidence interval
    #%Za = -invnorm(alpha2);   % known mean
    Za = -_invt(alpha2, num - 1)  # unknown mean
    mrlu = mrl + Za * srl / sqrt(num)
    mrll = mrl - Za * srl / sqrt(num)

    #options.CI = [mrll,mrlu];
    #options.numdata = num;
    titleTxt = 'Mean residual life with %d%s CI' % (100 * p, '%')
    res = PlotData(mrl,
                   u,
                   xlab='Threshold',
                   ylab='Mean Excess',
                   title=titleTxt)
    res.workspace = dict(numdata=num,
                         umin=umin,
                         umax=umax,
                         nu=nu,
                         nmin=nmin,
                         alpha=alpha)
    res.children = [
        PlotData(vstack([mrll, mrlu]).T, u, xlab='Threshold', title=titleTxt)
    ]
    res.plot_args_children = [':r']
    if plotflag:
        res.plot()
    return res
Exemplo n.º 4
0
def reslife(data, u=None, umin=None, umax=None, nu=None, nmin=3, alpha=0.05,
            plotflag=False):
    '''
    Return Mean Residual Life, i.e., mean excesses vs thresholds

    Parameters
    ---------
    data : array_like
        vector of data of length N.
    u :  array-like
        threshold values (default linspace(umin, umax, nu))
    umin, umax : real scalars
        Minimum and maximum threshold, respectively
        (default min(data), max(data)).
    nu : scalar integer
        number of threshold values (default min(N-nmin,100))
    nmin : scalar integer
        Minimum number of extremes to include. (Default 3).
    alpha : real scalar
        Confidence coefficient (default 0.05)
    plotflag: bool

    Returns
    -------
    mrl : PlotData object
        Mean residual life values, i.e., mean excesses over thresholds, u.

    Notes
    -----
    RESLIFE estimate mean excesses over thresholds. The purpose of MRL is
    to determine the threshold where the upper tail of the data can be
    approximated with the generalized Pareto distribution (GPD). The GPD is
    appropriate for the tail, if the MRL is a linear function of the
    threshold, u. Theoretically in the GPD model

        E(X-u0|X>u0) = s0/(1+k)
        E(X-u |X>u)  = s/(1+k) = (s0 -k*u)/(1+k)   for u>u0

    where k,s is the shape and scale parameter, respectively.
    s0 = scale parameter for threshold u0<u.

    Example
    -------
    >>> import wafo
    >>> R = wafo.stats.genpareto.rvs(0.1,2,2,size=100)
    >>> mrl = reslife(R,nu=20)
    >>> h = mrl.plot()

    See also
    ---------
    genpareto
    fitgenparrange, disprsnidx
    '''
    if u is None:
        sd = np.sort(data)
        n = len(data)

        nmin = max(nmin, 0)
        if 2 * nmin > n:
            warnings.warn('nmin possibly too large!')

        sdmax, sdmin = sd[-nmin], sd[0]
        umax = sdmax if umax is None else min(umax, sdmax)
        umin = sdmin if umin is None else max(umin, sdmin)

        if nu is None:
            nu = min(n - nmin, 100)

        u = linspace(umin, umax, nu)

    nu = len(u)

    #mrl1 = valarray(nu)
    #srl = valarray(nu)
    #num = valarray(nu)

    mean_and_std = lambda data1: (data1.mean(), data1.std(), data1.size)
    dat = arr(data)
    tmp = arr([mean_and_std(dat[dat > tresh] - tresh) for tresh in u.tolist()])

    mrl, srl, num = tmp.T
    p = 1 - alpha
    alpha2 = alpha / 2

    # Approximate P% confidence interval
    #%Za = -invnorm(alpha2);   % known mean
    Za = -_invt(alpha2, num - 1)  # unknown mean
    mrlu = mrl + Za * srl / sqrt(num)
    mrll = mrl - Za * srl / sqrt(num)

    #options.CI = [mrll,mrlu];
    #options.numdata = num;
    titleTxt = 'Mean residual life with %d%s CI' % (100 * p, '%')
    res = PlotData(mrl, u, xlab='Threshold',
                   ylab='Mean Excess', title=titleTxt)
    res.workspace = dict(
        numdata=num, umin=umin, umax=umax, nu=nu, nmin=nmin, alpha=alpha)
    res.children = [
        PlotData(vstack([mrll, mrlu]).T, u, xlab='Threshold', title=titleTxt)]
    res.plot_args_children = [':r']
    if plotflag:
        res.plot()
    return res