예제 #1
0
 def setUp(self):
     x = np.linspace(0, np.pi, 5)
     self.d2 = PlotData(np.sin(x),
                        x,
                        xlab='x',
                        ylab='sin',
                        title='sinus',
                        plot_args=['r.'])
     self.x = x
예제 #2
0
파일: core.py 프로젝트: morbult/pywafo
def edf(x, method=2):
    """
    Returns Empirical Distribution Function (EDF).

    Parameters
    ----------
    x : array-like
        data vector
    method : integer scalar
        1. Interpolation so that F(X_(k)) == (k-0.5)/n.
        2. Interpolation so that F(X_(k)) == k/(n+1).    (default)
        3. The empirical distribution. F(X_(k)) = k/n

    Examples
    --------
    >>> import wafo.stats as ws
    >>> x = np.linspace(0,6,200)
    >>> R = ws.rayleigh.rvs(scale=2,size=100)
    >>> F = ws.edf(R)
    >>> h = F.plot()

     See also edf, pdfplot, cumtrapz
    """
    z = atleast_1d(x)
    z.sort()

    n = len(z)
    if method == 1:
        e_cdf = arange(0.5, n) / n
    elif method == 3:
        e_cdf = arange(1, n + 1) / n
    else:
        e_cdf = arange(1, n + 1) / (n + 1)

    return PlotData(e_cdf, z, xlab='x', ylab='F(x)', plotmethod='step')
예제 #3
0
    def prb_search_best(self, prb_e=None, hsvec=None, hsfun='hste',
                        alpha=0.05, color='r', label=''):
        """Return best smoothed binomial probability.

        Parameters
        ----------
        prb_e : PlotData object with empirical binomial probabilites
        hsvec : arraylike  (default np.linspace(hsmax*0.1,hsmax,55))
            vector smoothing parameters
        hsfun :
            method for calculating hsmax

        """
        if prb_e is None:
            prb_e = self.prb_empirical(alpha=alpha, color=color)
        if hsvec is None:
            hsmax = max(self._get_max_smoothing(hsfun)[0], self.hs_e)
            hsvec = np.linspace(hsmax * 0.2, hsmax, 55)

        hs_best = hsvec[-1] + 0.1
        prb_best = self.prb_smoothed(prb_e, hs_best, alpha, color, label)
        aicc = np.zeros(np.size(hsvec))
        for i, hi in enumerate(hsvec):
            f = self.prb_smoothed(prb_e, hi, alpha, color, label)
            aicc[i] = f.aicc
            if f.aicc <= prb_best.aicc:
                prb_best = f
                hs_best = hi
        prb_best.score = PlotData(aicc, hsvec)
        prb_best.hs = hs_best
        self._set_smoothing(hs_best)
        return prb_best
예제 #4
0
    def __init__(self, *args, **kwds):
        options = dict(
            title='Transform',
            xlab='x',
            ylab='g(x)',
            plot_args=['r'],
            plot_args_children=['g--'],
        )
        options.update(**kwds)
        super(TrData, self).__init__(*args, **options)
        self.ymean = kwds.get('ymean', 0e0)
        self.ysigma = kwds.get('ysigma', 1e0)
        self.mean = kwds.get('mean')
        self.sigma = kwds.get('sigma')

        if self.mean is None:
            # self.mean = np.mean(self.args)
            self.mean = self.gauss2dat(self.ymean)
        if self.sigma is None:
            yp = self.ymean + self.ysigma
            ym = self.ymean - self.ysigma
            self.sigma = (self.gauss2dat(yp) - self.gauss2dat(ym)) / 2.

        self.children = [
            PlotData((self.args - self.mean) / self.sigma, self.args)
        ]
예제 #5
0
    def prb_smoothed(self, prb_e, hs, alpha=0.05, color='r', label=''):
        """Return smoothed binomial probability.

        Parameters
        ----------
        prb_e : PlotData object with empirical binomial probabilites
        hs : smoothing parameter
        alpha : confidence level
        color : color of plot object
        label : label for plot object

        """

        x_e = prb_e.args
        n_e = len(x_e)
        dx_e = x_e[1] - x_e[0]
        n = self.x.size

        x_s = np.linspace(x_e[0], x_e[-1], 10 * n_e + 1)
        self.hs = hs

        prb_s = self.kreg(x_s, output='plotobj', title='', plot_kwds=dict(
            color=color, linewidth=2))  # dict(plotflag=7))
        m_nan = np.isnan(prb_s.data)
        if m_nan.any():  # assume 0/0 division
            prb_s.data[m_nan] = 0.0

        # prb_s.data[np.isnan(prb_s.data)] = 0
        # expected number of data in each bin
        c_s = self.kreg.tkde.eval_grid_fast(x_s) * dx_e * n
        plo, pup = self.prb_ci(c_s, prb_s.data, alpha)

        prb_s.dataCI = np.vstack((plo, pup)).T
        prb_s.prediction_error_avg = (np.trapz(pup - plo, x_s) /
                                      (x_s[-1] - x_s[0]))

        if label:
            prb_s.plot_kwds['label'] = label
        prb_s.children = [PlotData([plo, pup], x_s,
                                   plotmethod='fill_between',
                                   plot_kwds=dict(alpha=0.2, color=color)),
                          prb_e]

        p_e = prb_e.eval_points(x_s)
        p_s = prb_s.data
        dp_s = np.sign(np.diff(p_s))
        k = (dp_s[:-1] != dp_s[1:]).sum()  # numpeaks

        sigmai = _logit(pup) - _logit(plo) + _EPS
        aicc = ((((_logit(p_e) - _logit(p_s)) / sigmai) ** 2).sum() +
                2 * k * (k + 1) / np.maximum(n_e - k + 1, 1) +
                np.abs((p_e - pup).clip(min=0) -
                       (p_e - plo).clip(max=0)).sum())

        prb_s.aicc = aicc
        return prb_s
예제 #6
0
파일: kdetools.py 프로젝트: morbult/pywafo
    def prb_empirical(self, xi=None, hs_e=None, alpha=0.05, color='r', **kwds):
        """Returns empirical binomial probabiltity.

        Parameters
        ----------
        x : ndarray
            position vector
        y : ndarray
            binomial response variable (zeros and ones)
        alpha : scalar
            confidence level
        color:
            used in plot

        Returns
        -------
        P(x) : PlotData object
            empirical probability

        """
        if xi is None:
            xi = self.get_grid(hs_e)

        x = self.x
        y = self.y

        c = gridcount(x, xi)  # + self.a + self.b # count data
        if np.any(y == 1):
            c0 = gridcount(x[y == 1], xi)  # + self.a # count success
        else:
            c0 = np.zeros(np.shape(xi))
        prb = np.where(c == 0, 0, c0 / (c + _TINY))  # assume prb==0 for c==0
        CI = np.vstack(self.prb_ci(c, prb, alpha))

        prb_e = PlotData(prb,
                         xi,
                         plotmethod='plot',
                         plot_args=['.'],
                         plot_kwds=dict(markersize=6, color=color, picker=5))
        prb_e.dataCI = CI.T
        prb_e.count = c
        return prb_e
예제 #7
0
 def _make_object(self, f, **kwds):
     titlestr = 'Kernel density estimate ({})'.format(self.kernel.name)
     kwds2 = dict(title=titlestr)
     kwds2['plot_kwds'] = dict(plotflag=1)
     kwds2.update(**kwds)
     args = self.args
     if self.d == 1:
         args = args[0]
     wdata = PlotData(f, args, **kwds2)
     if self.d > 1:
         self._add_contour_levels(wdata)
     return wdata
예제 #8
0
파일: core.py 프로젝트: BackupGGCode/pywafo
def edf(x, method=2):
    '''
    Returns Empirical Distribution Function (EDF).

    Parameters
    ----------
    x : array-like
        data vector
    method : integer scalar
        1. Interpolation so that F(X_(k)) == (k-0.5)/n.
        2. Interpolation so that F(X_(k)) == k/(n+1).    (default)
        3. The empirical distribution. F(X_(k)) = k/n

    Example
    -------
    >>> import wafo.stats as ws
    >>> x = np.linspace(0,6,200)
    >>> R = ws.rayleigh.rvs(scale=2,size=100)
    >>> F = ws.edf(R)
    >>> h = F.plot()

     See also edf, pdfplot, cumtrapz
    '''
    z = atleast_1d(x)
    z.sort()

    N = len(z)
    if method == 1:
        Fz1 = arange(0.5, N) / N
    elif method == 3:
        Fz1 = arange(1, N + 1) / N
    else:
        Fz1 = arange(1, N + 1) / (N + 1)

    F = PlotData(Fz1, z, xlab='x', ylab='F(x)')
    F.setplotter('step')
    return F
예제 #9
0
def edf(x, method=2):
    '''
    Returns Empirical Distribution Function (EDF).

    Parameters
    ----------
    x : array-like
        data vector
    method : integer scalar
        1. Interpolation so that F(X_(k)) == (k-0.5)/n.
        2. Interpolation so that F(X_(k)) == k/(n+1).    (default)
        3. The empirical distribution. F(X_(k)) = k/n

    Example
    -------
    >>> import wafo.stats as ws
    >>> x = np.linspace(0,6,200)
    >>> R = ws.rayleigh.rvs(scale=2,size=100)
    >>> F = ws.edf(R)
    >>> h = F.plot()

     See also edf, pdfplot, cumtrapz
    '''
    z = atleast_1d(x)
    z.sort()

    N = len(z)
    if method == 1:
        Fz1 = arange(0.5, N) / N
    elif method == 3:
        Fz1 = arange(1, N + 1) / N
    else:
        Fz1 = arange(1, N + 1) / (N + 1)

    F = PlotData(Fz1, z, xlab='x', ylab='F(x)')
    F.setplotter('step')
    return F
예제 #10
0
class TestPlotData(unittest.TestCase):

    def setUp(self):
        x = np.linspace(0, np.pi, 5)
        self.d2 = PlotData(np.sin(x), x,
                           xlab='x', ylab='sin', title='sinus',
                           plot_args=['r.'])
        self.x = x

    def tearDown(self):
        pass

    def test_copy(self):
        d3 = self.d2.copy()  # shallow copy
        self.d2.args = None
        assert_array_almost_equal(d3.args, self.x)

    def test_labels_str(self):
        txt = str(self.d2.labels)
        self.assertEqual(txt,
                         'AxisLabels(title=sinus, xlab=x, ylab=sin, zlab=)')
예제 #11
0
class TestPlotData(unittest.TestCase):
    def setUp(self):
        x = np.linspace(0, np.pi, 5)
        self.d2 = PlotData(np.sin(x),
                           x,
                           xlab='x',
                           ylab='sin',
                           title='sinus',
                           plot_args=['r.'])
        self.x = x

    def tearDown(self):
        pass

    def test_copy(self):
        d3 = self.d2.copy()  # shallow copy
        self.d2.args = None
        assert_array_almost_equal(d3.args, self.x)

    def test_labels_str(self):
        txt = str(self.d2.labels)
        self.assertEqual(txt,
                         'AxisLabels(title=sinus, xlab=x, ylab=sin, zlab=)')
예제 #12
0
    def tocovdata(self, timeseries):
        """
        Return auto covariance function from data.

        Return
        -------
        acf : CovData1D object
            with attributes:
            data : ACF vector length L+1
            args : time lags  length L+1
            sigma : estimated large lag standard deviation of the estimate
                    assuming x is a Gaussian process:
                    if acf[k]=0 for all lags k>q then an approximation
                    of the variance for large samples due to Bartlett
                     var(acf[k])=1/N*(acf[0]**2+2*acf[1]**2+2*acf[2]**2+ ..+2*acf[q]**2)
                     for  k>q and where  N=length(x). Special case is
                     white noise where it equals acf[0]**2/N for k>0
            norm : bool
                If false indicating that auto_cov is not normalized

         Example:
         --------
         >>> import wafo.data
         >>> import wafo.objects as wo
         >>> x = wafo.data.sea()
         >>> ts = wo.mat2timeseries(x)
         >>> acf = ts.tocovdata(150)

         h = acf.plot()
        """
        lag = self.lag
        window = self.window
        detrend = self.detrend

        try:
            x = timeseries.data.flatten('F')
            dt = timeseries.sampling_period()
        except Exception:
            x = timeseries[:, 1:].flatten('F')
            dt = sampling_period(timeseries[:, 0])
        if self.dt is not None:
            dt = self.dt

        if self.tr is not None:
            x = self.tr.dat2gauss(x)

        n = len(x)
        indnan = np.isnan(x)
        if any(indnan):
            x = x - x[1 - indnan].mean()
            Ncens = n - indnan.sum()
            x[indnan] = 0.
        else:
            Ncens = n
            x = x - x.mean()
        if hasattr(detrend, '__call__'):
            x = detrend(x)

        nfft = 2 ** nextpow2(n)
        raw_periodogram = abs(fft(x, nfft)) ** 2 / Ncens
        # ifft = fft/nfft since raw_periodogram is real!
        auto_cov = np.real(fft(raw_periodogram)) / nfft

        if self.flag.startswith('unbiased'):
            # unbiased result, i.e. divide by n-abs(lag)
            auto_cov = auto_cov[:Ncens] * Ncens / np.arange(Ncens, 1, -1)

        if self.norm:
            auto_cov = auto_cov / auto_cov[0]

        if lag is None:
            lag = self._estimate_lag(auto_cov, Ncens)
        lag = min(lag, n - 2)
        if isinstance(window, str) or type(window) is tuple:
            win = get_window(window, 2 * lag - 1)
        else:
            win = np.asarray(window)
        auto_cov[:lag] = auto_cov[:lag] * win[lag - 1::]
        auto_cov[lag] = 0
        lags = slice(0, lag + 1)
        t = np.linspace(0, lag * dt, lag + 1)
        acf = CovData1D(auto_cov[lags], t)
        acf.sigma = np.sqrt(np.r_[0, auto_cov[0] ** 2,
                            auto_cov[0] ** 2 + 2 * np.cumsum(auto_cov[1:] ** 2)] / Ncens)
        acf.children = [PlotData(-2. * acf.sigma[lags], t),
                        PlotData(2. * acf.sigma[lags], t)]
        acf.plot_args_children = ['r:']
        acf.norm = self.norm
        return acf
예제 #13
0
파일: core.py 프로젝트: BackupGGCode/pywafo
def dispersion_idx(
    data, t=None, u=None, umin=None, umax=None, nu=None, nmin=10, tb=1,
        alpha=0.05, plotflag=False):
    '''Return Dispersion Index vs threshold

    Parameters
    ----------
    data, ti : array_like
        data values and sampled times, respectively.
    u :  array-like
        threshold values (default linspace(umin, umax, nu))
    umin, umax : real scalars
        Minimum and maximum threshold, respectively
        (default min(data), max(data)).
    nu : scalar integer
        number of threshold values (default min(N-nmin,100))
    nmin : scalar integer
        Minimum number of extremes to include. (Default 10).
    tb : Real scalar
        Block period (same unit as the sampled times)  (default 1)
    alpha : real scalar
        Confidence coefficient (default 0.05)
    plotflag: bool

    Returns
    -------
    DI : PlotData object
        Dispersion index
    b_u : real scalar
        threshold where the number of exceedances in a fixed period (Tb) is
        consistent with a Poisson process.
    ok_u : array-like
        all thresholds where the number of exceedances in a fixed period (Tb)
        is consistent with a Poisson process.

    Notes
    ------
    DISPRSNIDX estimate the Dispersion Index (DI) as function of threshold.
    DI measures the homogenity of data and the purpose of DI is to determine
    the threshold where the number of exceedances in a fixed period (Tb) is
    consistent with a Poisson process. For a Poisson process the DI is one.
    Thus the threshold should be so high that DI is not significantly
    different from 1.

    The Poisson hypothesis is not rejected if the estimated DI is between:

    chi2(alpha/2, M-1)/(M-1)< DI < chi^2(1 - alpha/2, M-1 }/(M - 1)

    where M is the total number of fixed periods/blocks -generally
    the total number of years in the sample.

    Example
    -------
    >>> import wafo.data
    >>> xn = wafo.data.sea()
    >>> t, data = xn.T
    >>> Ie = findpot(data,t,0,5);
    >>> di, u, ok_u = dispersion_idx(data[Ie],t[Ie],tb=100)
    >>> h = di.plot() # a threshold around 1 seems appropriate.
    >>> round(u*100)/100
    1.03

    vline(u)

    See also
    --------
    reslife,
    fitgenparrange,
    extremal_idx

    References
    ----------
    Ribatet, M. A.,(2006),
    A User's Guide to the POT Package (Version 1.0)
    month = {August},
    url = {http://cran.r-project.org/}

    Cunnane, C. (1979) Note on the poisson assumption in
    partial duration series model. Water Resource Research, 15\bold{(2)}
         :489--494.}
    '''

    n = len(data)
    if t is None:
        ti = arange(n)
    else:
        ti = arr(t) - min(t)

    t1 = np.empty(ti.shape, dtype=int)
    t1[:] = np.floor(ti / tb)

    if u is None:
        sd = np.sort(data)

        nmin = max(nmin, 0)
        if 2 * nmin > n:
            warnings.warn('nmin possibly too large!')

        sdmax, sdmin = sd[-nmin], sd[0]
        umax = sdmax if umax is None else min(umax, sdmax)
        umin = sdmin if umin is None else max(umin, sdmin)

        if nu is None:
            nu = min(n - nmin, 100)

        u = linspace(umin, umax, nu)

    nu = len(u)

    di = np.zeros(nu)

    d = arr(data)

    mint = int(min(t1))  # ; % mint should be 0.
    maxt = int(max(t1))
    M = maxt - mint + 1
    occ = np.zeros(M)

    for ix, tresh in enumerate(u.tolist()):
        excess = (d > tresh)
        lambda_ = excess.sum() / M
        for block in range(M):
            occ[block] = sum(excess[t1 == block])

        di[ix] = occ.var() / lambda_

    p = 1 - alpha

    diLo = _invchi2(1 - alpha / 2, M - 1) / (M - 1)
    diUp = _invchi2(alpha / 2, M - 1) / (M - 1)

    # Find appropriate threshold
    k1, = np.where((diLo < di) & (di < diUp))
    if len(k1) > 0:
        ok_u = u[k1]
        b_di = (di[k1].mean() < di[k1])
        k = b_di.argmax()
        b_u = ok_u[k]
    else:
        b_u = ok_u = None

    CItxt = '%d%s CI' % (100 * p, '%')
    titleTxt = 'Dispersion Index plot'

    res = PlotData(di, u, title=titleTxt,
                   labx='Threshold', laby='Dispersion Index')
        #'caption',CItxt);
    res.workspace = dict(umin=umin, umax=umax, nu=nu, nmin=nmin, alpha=alpha)
    res.children = [
        PlotData(vstack([diLo * ones(nu), diUp * ones(nu)]).T, u,
                 xlab='Threshold', title=CItxt)]
    res.plot_args_children = ['--r']
    if plotflag:
        res.plot(di)
    return res, b_u, ok_u
예제 #14
0
def reslife(data,
            u=None,
            umin=None,
            umax=None,
            nu=None,
            nmin=3,
            alpha=0.05,
            plotflag=False):
    '''
    Return Mean Residual Life, i.e., mean excesses vs thresholds

    Parameters
    ---------
    data : array_like
        vector of data of length N.
    u :  array-like
        threshold values (default linspace(umin, umax, nu))
    umin, umax : real scalars
        Minimum and maximum threshold, respectively
        (default min(data), max(data)).
    nu : scalar integer
        number of threshold values (default min(N-nmin,100))
    nmin : scalar integer
        Minimum number of extremes to include. (Default 3).
    alpha : real scalar
        Confidence coefficient (default 0.05)
    plotflag: bool

    Returns
    -------
    mrl : PlotData object
        Mean residual life values, i.e., mean excesses over thresholds, u.

    Notes
    -----
    RESLIFE estimate mean excesses over thresholds. The purpose of MRL is
    to determine the threshold where the upper tail of the data can be
    approximated with the generalized Pareto distribution (GPD). The GPD is
    appropriate for the tail, if the MRL is a linear function of the
    threshold, u. Theoretically in the GPD model

        E(X-u0|X>u0) = s0/(1+k)
        E(X-u |X>u)  = s/(1+k) = (s0 -k*u)/(1+k)   for u>u0

    where k,s is the shape and scale parameter, respectively.
    s0 = scale parameter for threshold u0<u.

    Example
    -------
    >>> import wafo
    >>> R = wafo.stats.genpareto.rvs(0.1,2,2,size=100)
    >>> mrl = reslife(R,nu=20)
    >>> h = mrl.plot()

    See also
    ---------
    genpareto
    fitgenparrange, disprsnidx
    '''
    if u is None:
        sd = np.sort(data)
        n = len(data)

        nmin = max(nmin, 0)
        if 2 * nmin > n:
            warnings.warn('nmin possibly too large!')

        sdmax, sdmin = sd[-nmin], sd[0]
        umax = sdmax if umax is None else min(umax, sdmax)
        umin = sdmin if umin is None else max(umin, sdmin)

        if nu is None:
            nu = min(n - nmin, 100)

        u = linspace(umin, umax, nu)

    nu = len(u)

    #mrl1 = valarray(nu)
    #srl = valarray(nu)
    #num = valarray(nu)

    mean_and_std = lambda data1: (data1.mean(), data1.std(), data1.size)
    dat = arr(data)
    tmp = arr([mean_and_std(dat[dat > tresh] - tresh) for tresh in u.tolist()])

    mrl, srl, num = tmp.T
    p = 1 - alpha
    alpha2 = alpha / 2

    # Approximate P% confidence interval
    #%Za = -invnorm(alpha2);   % known mean
    Za = -_invt(alpha2, num - 1)  # unknown mean
    mrlu = mrl + Za * srl / sqrt(num)
    mrll = mrl - Za * srl / sqrt(num)

    #options.CI = [mrll,mrlu];
    #options.numdata = num;
    titleTxt = 'Mean residual life with %d%s CI' % (100 * p, '%')
    res = PlotData(mrl,
                   u,
                   xlab='Threshold',
                   ylab='Mean Excess',
                   title=titleTxt)
    res.workspace = dict(numdata=num,
                         umin=umin,
                         umax=umax,
                         nu=nu,
                         nmin=nmin,
                         alpha=alpha)
    res.children = [
        PlotData(vstack([mrll, mrlu]).T, u, xlab='Threshold', title=titleTxt)
    ]
    res.plot_args_children = [':r']
    if plotflag:
        res.plot()
    return res
예제 #15
0
파일: core.py 프로젝트: BackupGGCode/pywafo
def reslife(data, u=None, umin=None, umax=None, nu=None, nmin=3, alpha=0.05,
            plotflag=False):
    '''
    Return Mean Residual Life, i.e., mean excesses vs thresholds

    Parameters
    ---------
    data : array_like
        vector of data of length N.
    u :  array-like
        threshold values (default linspace(umin, umax, nu))
    umin, umax : real scalars
        Minimum and maximum threshold, respectively
        (default min(data), max(data)).
    nu : scalar integer
        number of threshold values (default min(N-nmin,100))
    nmin : scalar integer
        Minimum number of extremes to include. (Default 3).
    alpha : real scalar
        Confidence coefficient (default 0.05)
    plotflag: bool

    Returns
    -------
    mrl : PlotData object
        Mean residual life values, i.e., mean excesses over thresholds, u.

    Notes
    -----
    RESLIFE estimate mean excesses over thresholds. The purpose of MRL is
    to determine the threshold where the upper tail of the data can be
    approximated with the generalized Pareto distribution (GPD). The GPD is
    appropriate for the tail, if the MRL is a linear function of the
    threshold, u. Theoretically in the GPD model

        E(X-u0|X>u0) = s0/(1+k)
        E(X-u |X>u)  = s/(1+k) = (s0 -k*u)/(1+k)   for u>u0

    where k,s is the shape and scale parameter, respectively.
    s0 = scale parameter for threshold u0<u.

    Example
    -------
    >>> import wafo
    >>> R = wafo.stats.genpareto.rvs(0.1,2,2,size=100)
    >>> mrl = reslife(R,nu=20)
    >>> h = mrl.plot()

    See also
    ---------
    genpareto
    fitgenparrange, disprsnidx
    '''
    if u is None:
        sd = np.sort(data)
        n = len(data)

        nmin = max(nmin, 0)
        if 2 * nmin > n:
            warnings.warn('nmin possibly too large!')

        sdmax, sdmin = sd[-nmin], sd[0]
        umax = sdmax if umax is None else min(umax, sdmax)
        umin = sdmin if umin is None else max(umin, sdmin)

        if nu is None:
            nu = min(n - nmin, 100)

        u = linspace(umin, umax, nu)

    nu = len(u)

    #mrl1 = valarray(nu)
    #srl = valarray(nu)
    #num = valarray(nu)

    mean_and_std = lambda data1: (data1.mean(), data1.std(), data1.size)
    dat = arr(data)
    tmp = arr([mean_and_std(dat[dat > tresh] - tresh) for tresh in u.tolist()])

    mrl, srl, num = tmp.T
    p = 1 - alpha
    alpha2 = alpha / 2

    # Approximate P% confidence interval
    #%Za = -invnorm(alpha2);   % known mean
    Za = -_invt(alpha2, num - 1)  # unknown mean
    mrlu = mrl + Za * srl / sqrt(num)
    mrll = mrl - Za * srl / sqrt(num)

    #options.CI = [mrll,mrlu];
    #options.numdata = num;
    titleTxt = 'Mean residual life with %d%s CI' % (100 * p, '%')
    res = PlotData(mrl, u, xlab='Threshold',
                   ylab='Mean Excess', title=titleTxt)
    res.workspace = dict(
        numdata=num, umin=umin, umax=umax, nu=nu, nmin=nmin, alpha=alpha)
    res.children = [
        PlotData(vstack([mrll, mrlu]).T, u, xlab='Threshold', title=titleTxt)]
    res.plot_args_children = [':r']
    if plotflag:
        res.plot()
    return res
예제 #16
0
def cav76pdf(t=None, h=None, mom=None, g=None):
    """
    CAV76PDF Cavanie et al. (1976) approximation of the density  (Tc,Ac)
             in a stationary Gaussian transform process X(t) where
             Y(t) = g(X(t)) (Y zero-mean Gaussian, X  non-Gaussian).

     CALL:  f = cav76pdf(t,h,[m0,m2,m4],g);

            f    = density of wave characteristics of half-wavelength
                   in a stationary Gaussian transformed process X(t),
                   where Y(t) = g(X(t)) (Y zero-mean Gaussian)
           t,h   = vectors of periods and amplitudes, respectively.
                   default depending on the spectral moments
     m0,m2,m4    = the 0'th, 2'nd and 4'th moment of the spectral density
                   with angular frequency.
            g    = space transformation, Y(t)=g(X(t)), default: g is identity
                   transformation, i.e. X(t) = Y(t)  is Gaussian,
                   The transformation, g, can be estimated using lc2tr
                   or dat2tr or given a priori by ochi.
           []    = default values are used.

     Examples
     --------
    >>> import wafo.spectrum.models as sm
    >>> Sj = sm.Jonswap()
    >>> w = np.linspace(0,4,256)
    >>> S = Sj.tospecdata(w)   #Make spectrum object from numerical values
    >>> S = sm.SpecData1D(Sj(w),w) # Alternatively do it manually
    >>> mom, mom_txt = S.moment(nr=4, even=True)
    >>> f = cav76pdf(mom=mom)
    >>> f.plot()

     See also
     --------
     lh83pdf, lc2tr, dat2tr

     References
     ----------
     Cavanie, A., Arhan, M. and Ezraty, R. (1976)
     "A statistical relationship between individual heights and periods of
      storm waves".
     In Proceedings Conference on Behaviour of Offshore Structures,
     Trondheim, pp. 354--360
     Norwegian Institute of Technology, Trondheim, Norway

     Lindgren, G. and Rychlik, I. (1982)
     Wave Characteristics Distributions for Gaussian Waves --
     Wave-lenght, Amplitude and Steepness, Ocean Engng vol 9, pp. 411-432.
    """
    # tested on: matlab 5.3 NB! note
    # History:
    # revised pab 04.11.2000
    # - fixed xlabels i.e. f.labx={'Tc','Ac'}
    # revised by IR 4 X 2000. fixed transform and normalisation
    # using Lindgren & Rychlik (1982) paper.
    # At the end of the function there is a text with derivation of the density.
    #
    # revised by jr 21.02.2000
    # - Introduced cell array for f.x for use with pdfplot
    # by pab 28.09.1999

    m0, m2, m4 = mom
    h, t, g = _set_default_t_h_g(t, h, g, m0, m2)

    eps4 = 1.0 - m2**2 / (m0 * m4)
    alfa = m2 / sqrt(m0 * m4)
    if np.any(~np.isreal(eps4)):
        raise ValueError('input moments are not correct')

    a = len(h)
    b = len(t)
    der = np.ones((a, 1))

    h_lh = g.dat2gauss(h.ravel(), der.ravel())
    der = abs(h_lh[1])
    h_lh = h_lh[0]

    # Normalization + transformation of t and h

    pos = 2 / (1 + alfa)  # inverse of a fraction of positive maxima
    cons = 2 * pi**4 * pos / sqrt(2 * pi) / m4 / sqrt((1 - alfa**2))
    # Tm=2*pi*sqrt(m0/m2)/alpha; #mean period between positive maxima

    t_lh = t
    h_lh = sqrt(m0) * h_lh

    # Computation of the distribution
    T, H = np.meshgrid(t_lh[1:b], h_lh)
    f_th = np.zeros((a, b))

    f_th[:, 1:b] = cons * der[:, None] * (H**2 / (T**5)) * np.exp(
        -0.5 * (H / T**2)**2. * ((T**2 - pi**2 * m2 / m4)**2 /
                                 (m0 * (1 - alfa**2)) + pi**4 / m4))
    f = PlotData(f_th, (t, h),
                 xlab='Tc',
                 ylab='Ac',
                 title='Joint density of (Tc,Ac) - Cavanie et al. (1976)',
                 plot_kwds=dict(plotflag=1))
    return _add_contour_levels(f)
예제 #17
0
def dispersion_idx(data,
                   t=None,
                   u=None,
                   umin=None,
                   umax=None,
                   nu=None,
                   nmin=10,
                   tb=1,
                   alpha=0.05,
                   plotflag=False):
    '''Return Dispersion Index vs threshold

    Parameters
    ----------
    data, ti : array_like
        data values and sampled times, respectively.
    u :  array-like
        threshold values (default linspace(umin, umax, nu))
    umin, umax : real scalars
        Minimum and maximum threshold, respectively
        (default min(data), max(data)).
    nu : scalar integer
        number of threshold values (default min(N-nmin,100))
    nmin : scalar integer
        Minimum number of extremes to include. (Default 10).
    tb : Real scalar
        Block period (same unit as the sampled times)  (default 1)
    alpha : real scalar
        Confidence coefficient (default 0.05)
    plotflag: bool

    Returns
    -------
    DI : PlotData object
        Dispersion index
    b_u : real scalar
        threshold where the number of exceedances in a fixed period (Tb) is
        consistent with a Poisson process.
    ok_u : array-like
        all thresholds where the number of exceedances in a fixed period (Tb)
        is consistent with a Poisson process.

    Notes
    ------
    DISPRSNIDX estimate the Dispersion Index (DI) as function of threshold.
    DI measures the homogenity of data and the purpose of DI is to determine
    the threshold where the number of exceedances in a fixed period (Tb) is
    consistent with a Poisson process. For a Poisson process the DI is one.
    Thus the threshold should be so high that DI is not significantly
    different from 1.

    The Poisson hypothesis is not rejected if the estimated DI is between:

    chi2(alpha/2, M-1)/(M-1)< DI < chi^2(1 - alpha/2, M-1 }/(M - 1)

    where M is the total number of fixed periods/blocks -generally
    the total number of years in the sample.

    Example
    -------
    >>> import wafo.data
    >>> xn = wafo.data.sea()
    >>> t, data = xn.T
    >>> Ie = findpot(data,t,0,5);
    >>> di, u, ok_u = dispersion_idx(data[Ie],t[Ie],tb=100)
    >>> h = di.plot() # a threshold around 1 seems appropriate.
    >>> round(u*100)/100
    1.03

    vline(u)

    See also
    --------
    reslife,
    fitgenparrange,
    extremal_idx

    References
    ----------
    Ribatet, M. A.,(2006),
    A User's Guide to the POT Package (Version 1.0)
    month = {August},
    url = {http://cran.r-project.org/}

    Cunnane, C. (1979) Note on the poisson assumption in
    partial duration series model. Water Resource Research, 15\bold{(2)}
         :489--494.}
    '''

    n = len(data)
    if t is None:
        ti = arange(n)
    else:
        ti = arr(t) - min(t)

    t1 = np.empty(ti.shape, dtype=int)
    t1[:] = np.floor(ti / tb)

    if u is None:
        sd = np.sort(data)

        nmin = max(nmin, 0)
        if 2 * nmin > n:
            warnings.warn('nmin possibly too large!')

        sdmax, sdmin = sd[-nmin], sd[0]
        umax = sdmax if umax is None else min(umax, sdmax)
        umin = sdmin if umin is None else max(umin, sdmin)

        if nu is None:
            nu = min(n - nmin, 100)

        u = linspace(umin, umax, nu)

    nu = len(u)

    di = np.zeros(nu)

    d = arr(data)

    mint = int(min(t1))  # ; % mint should be 0.
    maxt = int(max(t1))
    M = maxt - mint + 1
    occ = np.zeros(M)

    for ix, tresh in enumerate(u.tolist()):
        excess = (d > tresh)
        lambda_ = excess.sum() / M
        for block in range(M):
            occ[block] = sum(excess[t1 == block])

        di[ix] = occ.var() / lambda_

    p = 1 - alpha

    diLo = _invchi2(1 - alpha / 2, M - 1) / (M - 1)
    diUp = _invchi2(alpha / 2, M - 1) / (M - 1)

    # Find appropriate threshold
    k1, = np.where((diLo < di) & (di < diUp))
    if len(k1) > 0:
        ok_u = u[k1]
        b_di = (di[k1].mean() < di[k1])
        k = b_di.argmax()
        b_u = ok_u[k]
    else:
        b_u = ok_u = None

    CItxt = '%d%s CI' % (100 * p, '%')
    titleTxt = 'Dispersion Index plot'

    res = PlotData(di,
                   u,
                   title=titleTxt,
                   labx='Threshold',
                   laby='Dispersion Index')
    #'caption',CItxt);
    res.workspace = dict(umin=umin, umax=umax, nu=nu, nmin=nmin, alpha=alpha)
    res.children = [
        PlotData(vstack([diLo * ones(nu), diUp * ones(nu)]).T,
                 u,
                 xlab='Threshold',
                 title=CItxt)
    ]
    res.plot_args_children = ['--r']
    if plotflag:
        res.plot(di)
    return res, b_u, ok_u
예제 #18
0
def lh83pdf(t=None, h=None, mom=None, g=None):
    """
    LH83PDF Longuet-Higgins (1983) approximation of the density (Tc,Ac)
             in a stationary Gaussian transform process X(t) where
             Y(t) = g(X(t)) (Y zero-mean Gaussian, X  non-Gaussian).

      CALL:  f   = lh83pdf(t,h,[m0,m1,m2],g);

            f    = density of wave characteristics of half-wavelength
                   in a stationary Gaussian transformed process X(t),
                   where Y(t) = g(X(t)) (Y zero-mean Gaussian)
           t,h   = vectors of periods and amplitudes, respectively.
                   default depending on the spectral moments
        m0,m1,m2 = the 0'th,1'st and 2'nd moment of the spectral density
                   with angular  frequency.
            g    = space transformation, Y(t)=g(X(t)), default: g is identity
                   transformation, i.e. X(t) = Y(t)  is Gaussian,
                   The transformation, g, can be estimated using lc2tr
                   or dat2tr or given apriori by ochi.

    Examples
    --------
    >>> import wafo.spectrum.models as sm
    >>> Sj = sm.Jonswap()
    >>> w = np.linspace(0,4,256)
    >>> S = Sj.tospecdata(w)   #Make spectrum object from numerical values
    >>> S = sm.SpecData1D(Sj(w),w) # Alternatively do it manually
    >>> mom, mom_txt = S.moment(nr=2, even=False)
    >>> f = lh83pdf(mom=mom)
    >>> f.plot()

    See also
    --------
    cav76pdf,  lc2tr, dat2tr

    References
    ----------
    Longuet-Higgins,  M.S. (1983)
    "On the joint distribution wave periods and amplitudes in a
     random wave field", Proc. R. Soc. A389, pp 24--258

    Longuet-Higgins,  M.S. (1975)
    "On the joint distribution wave periods and amplitudes of sea waves",
    J. geophys. Res. 80, pp 2688--2694
    """

    # tested on: matlab 5.3
    # History:
    # Revised pab 01.04.2001
    # - Added example
    # - Better automatic scaling for h,t
    # revised by IR 18.06.2000, fixing transformation and transposing t and h to fit simpson req.
    # revised by pab 28.09.1999
    #   made more efficient calculation of f
    # by Igor Rychlik

    m0, m1, m2 = mom
    h, t, g = _set_default_t_h_g(t, h, g, m0, m2)

    L0 = m0
    L1 = m1 / (2 * pi)
    L2 = m2 / (2 * pi)**2
    eps2 = sqrt((L2 * L0) / (L1**2) - 1)

    if np.any(~np.isreal(eps2)):
        raise ValueError('input moments are not correct')

    const = 4 / sqrt(pi) / eps2 / (1 + 1 / sqrt(1 + eps2**2))

    a = len(h)
    b = len(t)
    der = np.ones((a, 1))

    h_lh = g.dat2gauss(h.ravel(), der.ravel())

    der = abs(h_lh[1])  # abs(h_lh[:, 1])
    h_lh = h_lh[0]

    # Normalization + transformation of t and h ???????
    # Without any transformation

    t_lh = t / (L0 / L1)
    # h_lh = h_lh/sqrt(2*L0)
    h_lh = h_lh / sqrt(2)
    t_lh = 2 * t_lh

    # Computation of the distribution
    T, H = np.meshgrid(t_lh[1:b], h_lh)
    f_th = np.zeros((a, b))
    tmp = const * der[:, None] * (H / T)**2 * np.exp(-H**2. * (1 + (
        (1 - 1. / T) / eps2)**2)) / ((L0 / L1) * sqrt(2) / 2)
    f_th[:, 1:b] = tmp

    f = PlotData(f_th, (t, h),
                 xlab='Tc',
                 ylab='Ac',
                 title='Joint density of (Tc,Ac) - Longuet-Higgins (1983)',
                 plot_kwds=dict(plotflag=1))

    return _add_contour_levels(f)
예제 #19
0
 def setUp(self):
     x = np.linspace(0, np.pi, 5)
     self.d2 = PlotData(np.sin(x), x,
                        xlab='x', ylab='sin', title='sinus',
                        plot_args=['r.'])
     self.x = x