def setUp(self): x = np.linspace(0, np.pi, 5) self.d2 = PlotData(np.sin(x), x, xlab='x', ylab='sin', title='sinus', plot_args=['r.']) self.x = x
def edf(x, method=2): """ Returns Empirical Distribution Function (EDF). Parameters ---------- x : array-like data vector method : integer scalar 1. Interpolation so that F(X_(k)) == (k-0.5)/n. 2. Interpolation so that F(X_(k)) == k/(n+1). (default) 3. The empirical distribution. F(X_(k)) = k/n Examples -------- >>> import wafo.stats as ws >>> x = np.linspace(0,6,200) >>> R = ws.rayleigh.rvs(scale=2,size=100) >>> F = ws.edf(R) >>> h = F.plot() See also edf, pdfplot, cumtrapz """ z = atleast_1d(x) z.sort() n = len(z) if method == 1: e_cdf = arange(0.5, n) / n elif method == 3: e_cdf = arange(1, n + 1) / n else: e_cdf = arange(1, n + 1) / (n + 1) return PlotData(e_cdf, z, xlab='x', ylab='F(x)', plotmethod='step')
def prb_search_best(self, prb_e=None, hsvec=None, hsfun='hste', alpha=0.05, color='r', label=''): """Return best smoothed binomial probability. Parameters ---------- prb_e : PlotData object with empirical binomial probabilites hsvec : arraylike (default np.linspace(hsmax*0.1,hsmax,55)) vector smoothing parameters hsfun : method for calculating hsmax """ if prb_e is None: prb_e = self.prb_empirical(alpha=alpha, color=color) if hsvec is None: hsmax = max(self._get_max_smoothing(hsfun)[0], self.hs_e) hsvec = np.linspace(hsmax * 0.2, hsmax, 55) hs_best = hsvec[-1] + 0.1 prb_best = self.prb_smoothed(prb_e, hs_best, alpha, color, label) aicc = np.zeros(np.size(hsvec)) for i, hi in enumerate(hsvec): f = self.prb_smoothed(prb_e, hi, alpha, color, label) aicc[i] = f.aicc if f.aicc <= prb_best.aicc: prb_best = f hs_best = hi prb_best.score = PlotData(aicc, hsvec) prb_best.hs = hs_best self._set_smoothing(hs_best) return prb_best
def __init__(self, *args, **kwds): options = dict( title='Transform', xlab='x', ylab='g(x)', plot_args=['r'], plot_args_children=['g--'], ) options.update(**kwds) super(TrData, self).__init__(*args, **options) self.ymean = kwds.get('ymean', 0e0) self.ysigma = kwds.get('ysigma', 1e0) self.mean = kwds.get('mean') self.sigma = kwds.get('sigma') if self.mean is None: # self.mean = np.mean(self.args) self.mean = self.gauss2dat(self.ymean) if self.sigma is None: yp = self.ymean + self.ysigma ym = self.ymean - self.ysigma self.sigma = (self.gauss2dat(yp) - self.gauss2dat(ym)) / 2. self.children = [ PlotData((self.args - self.mean) / self.sigma, self.args) ]
def prb_smoothed(self, prb_e, hs, alpha=0.05, color='r', label=''): """Return smoothed binomial probability. Parameters ---------- prb_e : PlotData object with empirical binomial probabilites hs : smoothing parameter alpha : confidence level color : color of plot object label : label for plot object """ x_e = prb_e.args n_e = len(x_e) dx_e = x_e[1] - x_e[0] n = self.x.size x_s = np.linspace(x_e[0], x_e[-1], 10 * n_e + 1) self.hs = hs prb_s = self.kreg(x_s, output='plotobj', title='', plot_kwds=dict( color=color, linewidth=2)) # dict(plotflag=7)) m_nan = np.isnan(prb_s.data) if m_nan.any(): # assume 0/0 division prb_s.data[m_nan] = 0.0 # prb_s.data[np.isnan(prb_s.data)] = 0 # expected number of data in each bin c_s = self.kreg.tkde.eval_grid_fast(x_s) * dx_e * n plo, pup = self.prb_ci(c_s, prb_s.data, alpha) prb_s.dataCI = np.vstack((plo, pup)).T prb_s.prediction_error_avg = (np.trapz(pup - plo, x_s) / (x_s[-1] - x_s[0])) if label: prb_s.plot_kwds['label'] = label prb_s.children = [PlotData([plo, pup], x_s, plotmethod='fill_between', plot_kwds=dict(alpha=0.2, color=color)), prb_e] p_e = prb_e.eval_points(x_s) p_s = prb_s.data dp_s = np.sign(np.diff(p_s)) k = (dp_s[:-1] != dp_s[1:]).sum() # numpeaks sigmai = _logit(pup) - _logit(plo) + _EPS aicc = ((((_logit(p_e) - _logit(p_s)) / sigmai) ** 2).sum() + 2 * k * (k + 1) / np.maximum(n_e - k + 1, 1) + np.abs((p_e - pup).clip(min=0) - (p_e - plo).clip(max=0)).sum()) prb_s.aicc = aicc return prb_s
def prb_empirical(self, xi=None, hs_e=None, alpha=0.05, color='r', **kwds): """Returns empirical binomial probabiltity. Parameters ---------- x : ndarray position vector y : ndarray binomial response variable (zeros and ones) alpha : scalar confidence level color: used in plot Returns ------- P(x) : PlotData object empirical probability """ if xi is None: xi = self.get_grid(hs_e) x = self.x y = self.y c = gridcount(x, xi) # + self.a + self.b # count data if np.any(y == 1): c0 = gridcount(x[y == 1], xi) # + self.a # count success else: c0 = np.zeros(np.shape(xi)) prb = np.where(c == 0, 0, c0 / (c + _TINY)) # assume prb==0 for c==0 CI = np.vstack(self.prb_ci(c, prb, alpha)) prb_e = PlotData(prb, xi, plotmethod='plot', plot_args=['.'], plot_kwds=dict(markersize=6, color=color, picker=5)) prb_e.dataCI = CI.T prb_e.count = c return prb_e
def _make_object(self, f, **kwds): titlestr = 'Kernel density estimate ({})'.format(self.kernel.name) kwds2 = dict(title=titlestr) kwds2['plot_kwds'] = dict(plotflag=1) kwds2.update(**kwds) args = self.args if self.d == 1: args = args[0] wdata = PlotData(f, args, **kwds2) if self.d > 1: self._add_contour_levels(wdata) return wdata
def edf(x, method=2): ''' Returns Empirical Distribution Function (EDF). Parameters ---------- x : array-like data vector method : integer scalar 1. Interpolation so that F(X_(k)) == (k-0.5)/n. 2. Interpolation so that F(X_(k)) == k/(n+1). (default) 3. The empirical distribution. F(X_(k)) = k/n Example ------- >>> import wafo.stats as ws >>> x = np.linspace(0,6,200) >>> R = ws.rayleigh.rvs(scale=2,size=100) >>> F = ws.edf(R) >>> h = F.plot() See also edf, pdfplot, cumtrapz ''' z = atleast_1d(x) z.sort() N = len(z) if method == 1: Fz1 = arange(0.5, N) / N elif method == 3: Fz1 = arange(1, N + 1) / N else: Fz1 = arange(1, N + 1) / (N + 1) F = PlotData(Fz1, z, xlab='x', ylab='F(x)') F.setplotter('step') return F
class TestPlotData(unittest.TestCase): def setUp(self): x = np.linspace(0, np.pi, 5) self.d2 = PlotData(np.sin(x), x, xlab='x', ylab='sin', title='sinus', plot_args=['r.']) self.x = x def tearDown(self): pass def test_copy(self): d3 = self.d2.copy() # shallow copy self.d2.args = None assert_array_almost_equal(d3.args, self.x) def test_labels_str(self): txt = str(self.d2.labels) self.assertEqual(txt, 'AxisLabels(title=sinus, xlab=x, ylab=sin, zlab=)')
def tocovdata(self, timeseries): """ Return auto covariance function from data. Return ------- acf : CovData1D object with attributes: data : ACF vector length L+1 args : time lags length L+1 sigma : estimated large lag standard deviation of the estimate assuming x is a Gaussian process: if acf[k]=0 for all lags k>q then an approximation of the variance for large samples due to Bartlett var(acf[k])=1/N*(acf[0]**2+2*acf[1]**2+2*acf[2]**2+ ..+2*acf[q]**2) for k>q and where N=length(x). Special case is white noise where it equals acf[0]**2/N for k>0 norm : bool If false indicating that auto_cov is not normalized Example: -------- >>> import wafo.data >>> import wafo.objects as wo >>> x = wafo.data.sea() >>> ts = wo.mat2timeseries(x) >>> acf = ts.tocovdata(150) h = acf.plot() """ lag = self.lag window = self.window detrend = self.detrend try: x = timeseries.data.flatten('F') dt = timeseries.sampling_period() except Exception: x = timeseries[:, 1:].flatten('F') dt = sampling_period(timeseries[:, 0]) if self.dt is not None: dt = self.dt if self.tr is not None: x = self.tr.dat2gauss(x) n = len(x) indnan = np.isnan(x) if any(indnan): x = x - x[1 - indnan].mean() Ncens = n - indnan.sum() x[indnan] = 0. else: Ncens = n x = x - x.mean() if hasattr(detrend, '__call__'): x = detrend(x) nfft = 2 ** nextpow2(n) raw_periodogram = abs(fft(x, nfft)) ** 2 / Ncens # ifft = fft/nfft since raw_periodogram is real! auto_cov = np.real(fft(raw_periodogram)) / nfft if self.flag.startswith('unbiased'): # unbiased result, i.e. divide by n-abs(lag) auto_cov = auto_cov[:Ncens] * Ncens / np.arange(Ncens, 1, -1) if self.norm: auto_cov = auto_cov / auto_cov[0] if lag is None: lag = self._estimate_lag(auto_cov, Ncens) lag = min(lag, n - 2) if isinstance(window, str) or type(window) is tuple: win = get_window(window, 2 * lag - 1) else: win = np.asarray(window) auto_cov[:lag] = auto_cov[:lag] * win[lag - 1::] auto_cov[lag] = 0 lags = slice(0, lag + 1) t = np.linspace(0, lag * dt, lag + 1) acf = CovData1D(auto_cov[lags], t) acf.sigma = np.sqrt(np.r_[0, auto_cov[0] ** 2, auto_cov[0] ** 2 + 2 * np.cumsum(auto_cov[1:] ** 2)] / Ncens) acf.children = [PlotData(-2. * acf.sigma[lags], t), PlotData(2. * acf.sigma[lags], t)] acf.plot_args_children = ['r:'] acf.norm = self.norm return acf
def dispersion_idx( data, t=None, u=None, umin=None, umax=None, nu=None, nmin=10, tb=1, alpha=0.05, plotflag=False): '''Return Dispersion Index vs threshold Parameters ---------- data, ti : array_like data values and sampled times, respectively. u : array-like threshold values (default linspace(umin, umax, nu)) umin, umax : real scalars Minimum and maximum threshold, respectively (default min(data), max(data)). nu : scalar integer number of threshold values (default min(N-nmin,100)) nmin : scalar integer Minimum number of extremes to include. (Default 10). tb : Real scalar Block period (same unit as the sampled times) (default 1) alpha : real scalar Confidence coefficient (default 0.05) plotflag: bool Returns ------- DI : PlotData object Dispersion index b_u : real scalar threshold where the number of exceedances in a fixed period (Tb) is consistent with a Poisson process. ok_u : array-like all thresholds where the number of exceedances in a fixed period (Tb) is consistent with a Poisson process. Notes ------ DISPRSNIDX estimate the Dispersion Index (DI) as function of threshold. DI measures the homogenity of data and the purpose of DI is to determine the threshold where the number of exceedances in a fixed period (Tb) is consistent with a Poisson process. For a Poisson process the DI is one. Thus the threshold should be so high that DI is not significantly different from 1. The Poisson hypothesis is not rejected if the estimated DI is between: chi2(alpha/2, M-1)/(M-1)< DI < chi^2(1 - alpha/2, M-1 }/(M - 1) where M is the total number of fixed periods/blocks -generally the total number of years in the sample. Example ------- >>> import wafo.data >>> xn = wafo.data.sea() >>> t, data = xn.T >>> Ie = findpot(data,t,0,5); >>> di, u, ok_u = dispersion_idx(data[Ie],t[Ie],tb=100) >>> h = di.plot() # a threshold around 1 seems appropriate. >>> round(u*100)/100 1.03 vline(u) See also -------- reslife, fitgenparrange, extremal_idx References ---------- Ribatet, M. A.,(2006), A User's Guide to the POT Package (Version 1.0) month = {August}, url = {http://cran.r-project.org/} Cunnane, C. (1979) Note on the poisson assumption in partial duration series model. Water Resource Research, 15\bold{(2)} :489--494.} ''' n = len(data) if t is None: ti = arange(n) else: ti = arr(t) - min(t) t1 = np.empty(ti.shape, dtype=int) t1[:] = np.floor(ti / tb) if u is None: sd = np.sort(data) nmin = max(nmin, 0) if 2 * nmin > n: warnings.warn('nmin possibly too large!') sdmax, sdmin = sd[-nmin], sd[0] umax = sdmax if umax is None else min(umax, sdmax) umin = sdmin if umin is None else max(umin, sdmin) if nu is None: nu = min(n - nmin, 100) u = linspace(umin, umax, nu) nu = len(u) di = np.zeros(nu) d = arr(data) mint = int(min(t1)) # ; % mint should be 0. maxt = int(max(t1)) M = maxt - mint + 1 occ = np.zeros(M) for ix, tresh in enumerate(u.tolist()): excess = (d > tresh) lambda_ = excess.sum() / M for block in range(M): occ[block] = sum(excess[t1 == block]) di[ix] = occ.var() / lambda_ p = 1 - alpha diLo = _invchi2(1 - alpha / 2, M - 1) / (M - 1) diUp = _invchi2(alpha / 2, M - 1) / (M - 1) # Find appropriate threshold k1, = np.where((diLo < di) & (di < diUp)) if len(k1) > 0: ok_u = u[k1] b_di = (di[k1].mean() < di[k1]) k = b_di.argmax() b_u = ok_u[k] else: b_u = ok_u = None CItxt = '%d%s CI' % (100 * p, '%') titleTxt = 'Dispersion Index plot' res = PlotData(di, u, title=titleTxt, labx='Threshold', laby='Dispersion Index') #'caption',CItxt); res.workspace = dict(umin=umin, umax=umax, nu=nu, nmin=nmin, alpha=alpha) res.children = [ PlotData(vstack([diLo * ones(nu), diUp * ones(nu)]).T, u, xlab='Threshold', title=CItxt)] res.plot_args_children = ['--r'] if plotflag: res.plot(di) return res, b_u, ok_u
def reslife(data, u=None, umin=None, umax=None, nu=None, nmin=3, alpha=0.05, plotflag=False): ''' Return Mean Residual Life, i.e., mean excesses vs thresholds Parameters --------- data : array_like vector of data of length N. u : array-like threshold values (default linspace(umin, umax, nu)) umin, umax : real scalars Minimum and maximum threshold, respectively (default min(data), max(data)). nu : scalar integer number of threshold values (default min(N-nmin,100)) nmin : scalar integer Minimum number of extremes to include. (Default 3). alpha : real scalar Confidence coefficient (default 0.05) plotflag: bool Returns ------- mrl : PlotData object Mean residual life values, i.e., mean excesses over thresholds, u. Notes ----- RESLIFE estimate mean excesses over thresholds. The purpose of MRL is to determine the threshold where the upper tail of the data can be approximated with the generalized Pareto distribution (GPD). The GPD is appropriate for the tail, if the MRL is a linear function of the threshold, u. Theoretically in the GPD model E(X-u0|X>u0) = s0/(1+k) E(X-u |X>u) = s/(1+k) = (s0 -k*u)/(1+k) for u>u0 where k,s is the shape and scale parameter, respectively. s0 = scale parameter for threshold u0<u. Example ------- >>> import wafo >>> R = wafo.stats.genpareto.rvs(0.1,2,2,size=100) >>> mrl = reslife(R,nu=20) >>> h = mrl.plot() See also --------- genpareto fitgenparrange, disprsnidx ''' if u is None: sd = np.sort(data) n = len(data) nmin = max(nmin, 0) if 2 * nmin > n: warnings.warn('nmin possibly too large!') sdmax, sdmin = sd[-nmin], sd[0] umax = sdmax if umax is None else min(umax, sdmax) umin = sdmin if umin is None else max(umin, sdmin) if nu is None: nu = min(n - nmin, 100) u = linspace(umin, umax, nu) nu = len(u) #mrl1 = valarray(nu) #srl = valarray(nu) #num = valarray(nu) mean_and_std = lambda data1: (data1.mean(), data1.std(), data1.size) dat = arr(data) tmp = arr([mean_and_std(dat[dat > tresh] - tresh) for tresh in u.tolist()]) mrl, srl, num = tmp.T p = 1 - alpha alpha2 = alpha / 2 # Approximate P% confidence interval #%Za = -invnorm(alpha2); % known mean Za = -_invt(alpha2, num - 1) # unknown mean mrlu = mrl + Za * srl / sqrt(num) mrll = mrl - Za * srl / sqrt(num) #options.CI = [mrll,mrlu]; #options.numdata = num; titleTxt = 'Mean residual life with %d%s CI' % (100 * p, '%') res = PlotData(mrl, u, xlab='Threshold', ylab='Mean Excess', title=titleTxt) res.workspace = dict(numdata=num, umin=umin, umax=umax, nu=nu, nmin=nmin, alpha=alpha) res.children = [ PlotData(vstack([mrll, mrlu]).T, u, xlab='Threshold', title=titleTxt) ] res.plot_args_children = [':r'] if plotflag: res.plot() return res
def reslife(data, u=None, umin=None, umax=None, nu=None, nmin=3, alpha=0.05, plotflag=False): ''' Return Mean Residual Life, i.e., mean excesses vs thresholds Parameters --------- data : array_like vector of data of length N. u : array-like threshold values (default linspace(umin, umax, nu)) umin, umax : real scalars Minimum and maximum threshold, respectively (default min(data), max(data)). nu : scalar integer number of threshold values (default min(N-nmin,100)) nmin : scalar integer Minimum number of extremes to include. (Default 3). alpha : real scalar Confidence coefficient (default 0.05) plotflag: bool Returns ------- mrl : PlotData object Mean residual life values, i.e., mean excesses over thresholds, u. Notes ----- RESLIFE estimate mean excesses over thresholds. The purpose of MRL is to determine the threshold where the upper tail of the data can be approximated with the generalized Pareto distribution (GPD). The GPD is appropriate for the tail, if the MRL is a linear function of the threshold, u. Theoretically in the GPD model E(X-u0|X>u0) = s0/(1+k) E(X-u |X>u) = s/(1+k) = (s0 -k*u)/(1+k) for u>u0 where k,s is the shape and scale parameter, respectively. s0 = scale parameter for threshold u0<u. Example ------- >>> import wafo >>> R = wafo.stats.genpareto.rvs(0.1,2,2,size=100) >>> mrl = reslife(R,nu=20) >>> h = mrl.plot() See also --------- genpareto fitgenparrange, disprsnidx ''' if u is None: sd = np.sort(data) n = len(data) nmin = max(nmin, 0) if 2 * nmin > n: warnings.warn('nmin possibly too large!') sdmax, sdmin = sd[-nmin], sd[0] umax = sdmax if umax is None else min(umax, sdmax) umin = sdmin if umin is None else max(umin, sdmin) if nu is None: nu = min(n - nmin, 100) u = linspace(umin, umax, nu) nu = len(u) #mrl1 = valarray(nu) #srl = valarray(nu) #num = valarray(nu) mean_and_std = lambda data1: (data1.mean(), data1.std(), data1.size) dat = arr(data) tmp = arr([mean_and_std(dat[dat > tresh] - tresh) for tresh in u.tolist()]) mrl, srl, num = tmp.T p = 1 - alpha alpha2 = alpha / 2 # Approximate P% confidence interval #%Za = -invnorm(alpha2); % known mean Za = -_invt(alpha2, num - 1) # unknown mean mrlu = mrl + Za * srl / sqrt(num) mrll = mrl - Za * srl / sqrt(num) #options.CI = [mrll,mrlu]; #options.numdata = num; titleTxt = 'Mean residual life with %d%s CI' % (100 * p, '%') res = PlotData(mrl, u, xlab='Threshold', ylab='Mean Excess', title=titleTxt) res.workspace = dict( numdata=num, umin=umin, umax=umax, nu=nu, nmin=nmin, alpha=alpha) res.children = [ PlotData(vstack([mrll, mrlu]).T, u, xlab='Threshold', title=titleTxt)] res.plot_args_children = [':r'] if plotflag: res.plot() return res
def cav76pdf(t=None, h=None, mom=None, g=None): """ CAV76PDF Cavanie et al. (1976) approximation of the density (Tc,Ac) in a stationary Gaussian transform process X(t) where Y(t) = g(X(t)) (Y zero-mean Gaussian, X non-Gaussian). CALL: f = cav76pdf(t,h,[m0,m2,m4],g); f = density of wave characteristics of half-wavelength in a stationary Gaussian transformed process X(t), where Y(t) = g(X(t)) (Y zero-mean Gaussian) t,h = vectors of periods and amplitudes, respectively. default depending on the spectral moments m0,m2,m4 = the 0'th, 2'nd and 4'th moment of the spectral density with angular frequency. g = space transformation, Y(t)=g(X(t)), default: g is identity transformation, i.e. X(t) = Y(t) is Gaussian, The transformation, g, can be estimated using lc2tr or dat2tr or given a priori by ochi. [] = default values are used. Examples -------- >>> import wafo.spectrum.models as sm >>> Sj = sm.Jonswap() >>> w = np.linspace(0,4,256) >>> S = Sj.tospecdata(w) #Make spectrum object from numerical values >>> S = sm.SpecData1D(Sj(w),w) # Alternatively do it manually >>> mom, mom_txt = S.moment(nr=4, even=True) >>> f = cav76pdf(mom=mom) >>> f.plot() See also -------- lh83pdf, lc2tr, dat2tr References ---------- Cavanie, A., Arhan, M. and Ezraty, R. (1976) "A statistical relationship between individual heights and periods of storm waves". In Proceedings Conference on Behaviour of Offshore Structures, Trondheim, pp. 354--360 Norwegian Institute of Technology, Trondheim, Norway Lindgren, G. and Rychlik, I. (1982) Wave Characteristics Distributions for Gaussian Waves -- Wave-lenght, Amplitude and Steepness, Ocean Engng vol 9, pp. 411-432. """ # tested on: matlab 5.3 NB! note # History: # revised pab 04.11.2000 # - fixed xlabels i.e. f.labx={'Tc','Ac'} # revised by IR 4 X 2000. fixed transform and normalisation # using Lindgren & Rychlik (1982) paper. # At the end of the function there is a text with derivation of the density. # # revised by jr 21.02.2000 # - Introduced cell array for f.x for use with pdfplot # by pab 28.09.1999 m0, m2, m4 = mom h, t, g = _set_default_t_h_g(t, h, g, m0, m2) eps4 = 1.0 - m2**2 / (m0 * m4) alfa = m2 / sqrt(m0 * m4) if np.any(~np.isreal(eps4)): raise ValueError('input moments are not correct') a = len(h) b = len(t) der = np.ones((a, 1)) h_lh = g.dat2gauss(h.ravel(), der.ravel()) der = abs(h_lh[1]) h_lh = h_lh[0] # Normalization + transformation of t and h pos = 2 / (1 + alfa) # inverse of a fraction of positive maxima cons = 2 * pi**4 * pos / sqrt(2 * pi) / m4 / sqrt((1 - alfa**2)) # Tm=2*pi*sqrt(m0/m2)/alpha; #mean period between positive maxima t_lh = t h_lh = sqrt(m0) * h_lh # Computation of the distribution T, H = np.meshgrid(t_lh[1:b], h_lh) f_th = np.zeros((a, b)) f_th[:, 1:b] = cons * der[:, None] * (H**2 / (T**5)) * np.exp( -0.5 * (H / T**2)**2. * ((T**2 - pi**2 * m2 / m4)**2 / (m0 * (1 - alfa**2)) + pi**4 / m4)) f = PlotData(f_th, (t, h), xlab='Tc', ylab='Ac', title='Joint density of (Tc,Ac) - Cavanie et al. (1976)', plot_kwds=dict(plotflag=1)) return _add_contour_levels(f)
def dispersion_idx(data, t=None, u=None, umin=None, umax=None, nu=None, nmin=10, tb=1, alpha=0.05, plotflag=False): '''Return Dispersion Index vs threshold Parameters ---------- data, ti : array_like data values and sampled times, respectively. u : array-like threshold values (default linspace(umin, umax, nu)) umin, umax : real scalars Minimum and maximum threshold, respectively (default min(data), max(data)). nu : scalar integer number of threshold values (default min(N-nmin,100)) nmin : scalar integer Minimum number of extremes to include. (Default 10). tb : Real scalar Block period (same unit as the sampled times) (default 1) alpha : real scalar Confidence coefficient (default 0.05) plotflag: bool Returns ------- DI : PlotData object Dispersion index b_u : real scalar threshold where the number of exceedances in a fixed period (Tb) is consistent with a Poisson process. ok_u : array-like all thresholds where the number of exceedances in a fixed period (Tb) is consistent with a Poisson process. Notes ------ DISPRSNIDX estimate the Dispersion Index (DI) as function of threshold. DI measures the homogenity of data and the purpose of DI is to determine the threshold where the number of exceedances in a fixed period (Tb) is consistent with a Poisson process. For a Poisson process the DI is one. Thus the threshold should be so high that DI is not significantly different from 1. The Poisson hypothesis is not rejected if the estimated DI is between: chi2(alpha/2, M-1)/(M-1)< DI < chi^2(1 - alpha/2, M-1 }/(M - 1) where M is the total number of fixed periods/blocks -generally the total number of years in the sample. Example ------- >>> import wafo.data >>> xn = wafo.data.sea() >>> t, data = xn.T >>> Ie = findpot(data,t,0,5); >>> di, u, ok_u = dispersion_idx(data[Ie],t[Ie],tb=100) >>> h = di.plot() # a threshold around 1 seems appropriate. >>> round(u*100)/100 1.03 vline(u) See also -------- reslife, fitgenparrange, extremal_idx References ---------- Ribatet, M. A.,(2006), A User's Guide to the POT Package (Version 1.0) month = {August}, url = {http://cran.r-project.org/} Cunnane, C. (1979) Note on the poisson assumption in partial duration series model. Water Resource Research, 15\bold{(2)} :489--494.} ''' n = len(data) if t is None: ti = arange(n) else: ti = arr(t) - min(t) t1 = np.empty(ti.shape, dtype=int) t1[:] = np.floor(ti / tb) if u is None: sd = np.sort(data) nmin = max(nmin, 0) if 2 * nmin > n: warnings.warn('nmin possibly too large!') sdmax, sdmin = sd[-nmin], sd[0] umax = sdmax if umax is None else min(umax, sdmax) umin = sdmin if umin is None else max(umin, sdmin) if nu is None: nu = min(n - nmin, 100) u = linspace(umin, umax, nu) nu = len(u) di = np.zeros(nu) d = arr(data) mint = int(min(t1)) # ; % mint should be 0. maxt = int(max(t1)) M = maxt - mint + 1 occ = np.zeros(M) for ix, tresh in enumerate(u.tolist()): excess = (d > tresh) lambda_ = excess.sum() / M for block in range(M): occ[block] = sum(excess[t1 == block]) di[ix] = occ.var() / lambda_ p = 1 - alpha diLo = _invchi2(1 - alpha / 2, M - 1) / (M - 1) diUp = _invchi2(alpha / 2, M - 1) / (M - 1) # Find appropriate threshold k1, = np.where((diLo < di) & (di < diUp)) if len(k1) > 0: ok_u = u[k1] b_di = (di[k1].mean() < di[k1]) k = b_di.argmax() b_u = ok_u[k] else: b_u = ok_u = None CItxt = '%d%s CI' % (100 * p, '%') titleTxt = 'Dispersion Index plot' res = PlotData(di, u, title=titleTxt, labx='Threshold', laby='Dispersion Index') #'caption',CItxt); res.workspace = dict(umin=umin, umax=umax, nu=nu, nmin=nmin, alpha=alpha) res.children = [ PlotData(vstack([diLo * ones(nu), diUp * ones(nu)]).T, u, xlab='Threshold', title=CItxt) ] res.plot_args_children = ['--r'] if plotflag: res.plot(di) return res, b_u, ok_u
def lh83pdf(t=None, h=None, mom=None, g=None): """ LH83PDF Longuet-Higgins (1983) approximation of the density (Tc,Ac) in a stationary Gaussian transform process X(t) where Y(t) = g(X(t)) (Y zero-mean Gaussian, X non-Gaussian). CALL: f = lh83pdf(t,h,[m0,m1,m2],g); f = density of wave characteristics of half-wavelength in a stationary Gaussian transformed process X(t), where Y(t) = g(X(t)) (Y zero-mean Gaussian) t,h = vectors of periods and amplitudes, respectively. default depending on the spectral moments m0,m1,m2 = the 0'th,1'st and 2'nd moment of the spectral density with angular frequency. g = space transformation, Y(t)=g(X(t)), default: g is identity transformation, i.e. X(t) = Y(t) is Gaussian, The transformation, g, can be estimated using lc2tr or dat2tr or given apriori by ochi. Examples -------- >>> import wafo.spectrum.models as sm >>> Sj = sm.Jonswap() >>> w = np.linspace(0,4,256) >>> S = Sj.tospecdata(w) #Make spectrum object from numerical values >>> S = sm.SpecData1D(Sj(w),w) # Alternatively do it manually >>> mom, mom_txt = S.moment(nr=2, even=False) >>> f = lh83pdf(mom=mom) >>> f.plot() See also -------- cav76pdf, lc2tr, dat2tr References ---------- Longuet-Higgins, M.S. (1983) "On the joint distribution wave periods and amplitudes in a random wave field", Proc. R. Soc. A389, pp 24--258 Longuet-Higgins, M.S. (1975) "On the joint distribution wave periods and amplitudes of sea waves", J. geophys. Res. 80, pp 2688--2694 """ # tested on: matlab 5.3 # History: # Revised pab 01.04.2001 # - Added example # - Better automatic scaling for h,t # revised by IR 18.06.2000, fixing transformation and transposing t and h to fit simpson req. # revised by pab 28.09.1999 # made more efficient calculation of f # by Igor Rychlik m0, m1, m2 = mom h, t, g = _set_default_t_h_g(t, h, g, m0, m2) L0 = m0 L1 = m1 / (2 * pi) L2 = m2 / (2 * pi)**2 eps2 = sqrt((L2 * L0) / (L1**2) - 1) if np.any(~np.isreal(eps2)): raise ValueError('input moments are not correct') const = 4 / sqrt(pi) / eps2 / (1 + 1 / sqrt(1 + eps2**2)) a = len(h) b = len(t) der = np.ones((a, 1)) h_lh = g.dat2gauss(h.ravel(), der.ravel()) der = abs(h_lh[1]) # abs(h_lh[:, 1]) h_lh = h_lh[0] # Normalization + transformation of t and h ??????? # Without any transformation t_lh = t / (L0 / L1) # h_lh = h_lh/sqrt(2*L0) h_lh = h_lh / sqrt(2) t_lh = 2 * t_lh # Computation of the distribution T, H = np.meshgrid(t_lh[1:b], h_lh) f_th = np.zeros((a, b)) tmp = const * der[:, None] * (H / T)**2 * np.exp(-H**2. * (1 + ( (1 - 1. / T) / eps2)**2)) / ((L0 / L1) * sqrt(2) / 2) f_th[:, 1:b] = tmp f = PlotData(f_th, (t, h), xlab='Tc', ylab='Ac', title='Joint density of (Tc,Ac) - Longuet-Higgins (1983)', plot_kwds=dict(plotflag=1)) return _add_contour_levels(f)