def pick_duration(self, dest_id): """Randomly pick a duration based on the gumbel_r distribution for durations from self to destination station. If an unprecendented destination is selected, a random gumbel is picked from self""" if dest_id in self._duration_dict: duration = round(gumbel_r(*self._duration_dict[dest_id]).rvs(1)[0]) else: print( f"Warning: Station {self._id} was asked to generate unprecedented duration for destination {dest_id}" ) duration = round( gumbel_r( *choice(list(self._duration_dict.values()))).rvs(1)[0]) duration = max(duration, 1) return int(duration)
def _fit(self): if self.fit_method == 'mle': _params = _st.gumbel_r.fit(self.data) self.params = OrderedDict() self.params["shape"] = 0 self.params["location"] = _params[0] self.params["scale"] = _params[1] if self.fit_method == 'lmoments': _params = _lmdistr.gum.lmom_fit(self.data) self.params = OrderedDict() self.params["shape"] = 0 self.params["location"] = _params['loc'] self.params["scale"] = _params['scale'] # METHOD OF MOMENTS FIT if self.fit_method == 'mom': _params = _gum_momfit(self.data) self.params = OrderedDict() self.params["shape"] = _params[0] self.params["location"] = _params[1] self.params["scale"] = _params[2] self.c = self.params['shape'] self.loc = self.params['location'] self.scale = self.params['scale'] self.distr = _st.gumbel_r(loc=self.loc, scale=self.scale)
def extremeDistribution_blockMaximaGumb(x, t, t_st): '''Approximates the short-term extreme distribution using the block maxima method and the Gumbel (right) distribution. Parameters ---------- x : np.array Independent random variable (global peaks) t : np.array Time vector corresponding to x t_st : float Short-term period Returns ------- stextreme_dist: scipy.stats rv_frozen Probability distribution of the short-term extreme. stextreme_dist : scipy.stats rv_frozen Probability distribution of the short-term extreme. ste_params: np.array length 2 Parameters of the short term extreme distribution (Gumbel_r) [loc, scale]. block_maxima: np.array Block maxima (i.e. largest peak in each block). ''' block_maxima = blockMaxima(x, t, t_st) ste_parameters = stats.gumbel_r.fit(block_maxima) stextreme_dist = stats.gumbel_r(loc=ste_parameters[0], scale=ste_parameters[1]) return stextreme_dist, ste_parameters, block_maxima
def testBijector(self): with self.test_session(): loc = 0.3 scale = 5. bijector = Gumbel(loc=loc, scale=scale, event_ndims=1, validate_args=True) self.assertEqual("gumbel", bijector.name) x = np.array([[[-3.], [0.], [0.5], [4.2], [12.]]], dtype=np.float32) # Gumbel distribution gumbel_dist = stats.gumbel_r(loc=loc, scale=scale) y = gumbel_dist.cdf(x).astype(np.float32) self.assertAllClose(y, bijector.forward(x).eval()) self.assertAllClose(x, bijector.inverse(y).eval()) self.assertAllClose( # We should lose a dimension from calculating the determinant of the # jacobian. np.squeeze(gumbel_dist.logpdf(x), axis=2), bijector.forward_log_det_jacobian(x).eval()) self.assertAllClose(-bijector.inverse_log_det_jacobian(y).eval(), bijector.forward_log_det_jacobian(x).eval(), rtol=1e-4, atol=0.)
def integrnd(uh,h,uhrv): hmean = uh+9. hstd = 20. beta = (hstd)/(np.pi/np.sqrt(6)) mu = hmean-np.euler_gamma*beta hrv = stats.gumbel_r(loc=mu, scale=beta) pdfproduct = hrv.pdf(h)*uhrv.pdf(uh) return pdfproduct
def integrnd(uh, h, uhrv): hmean = uh + 9. hstd = 20. beta = (hstd) / (np.pi / np.sqrt(6)) mu = hmean - np.euler_gamma * beta hrv = stats.gumbel_r(loc=mu, scale=beta) pdfproduct = hrv.pdf(h) * uhrv.pdf(uh) return pdfproduct
def cdf_b(self,b1,b2): if b1>b2: saveb=b2 b2=b1 b1=saveb if self.rtype=="n": return(norm(loc=self.args[0],scale=self.args[1]).cdf(b2)-norm(loc=self.args[0],scale=self.args[1]).cdf(b1)) elif self.rtype=="ln": return(lognorm(s=self.args[1],scale=math.exp(self.args[0])).cdf(b2)-lognorm(s=self.args[1],scale=math.exp(self.args[0])).cdf(b1)) elif self.rtype=="g": return(gumbel_r(loc=self.args[0],scale=self.args[1]).cdf(b2)-gumbel_r(loc=self.args[0],scale=self.args[1]).cdf(b1)) elif self.rtype=="e": return(expon(loc=self.args[0],scale=self.args[1]).cdf(b2)-expon(loc=self.args[0],scale=self.args[1]).cdf(b1)) elif self.rtype=="u": return(uniform_r(loc=self.args[0],scale=self.args[1]).cdf(b2)-uniform_r(loc=self.args[0],scale=self.args[1]).cdf(b1)) else: print("distribution {0} not found" .format(rtype)) return("error - distribution")
def test_log_survival(): """ Test log_survival. """ loc = np.array([0.0]).astype(np.float32) scale = np.array([[1.0], [2.0]]).astype(np.float32) gumbel_benchmark = stats.gumbel_r(loc, scale) expect_log_survival = gumbel_benchmark.logsf([1.0, 2.0]).astype(np.float32) log_survival = LogSF() output = log_survival(Tensor([1.0, 2.0], dtype=dtype.float32)) tol = 5e-4 assert (np.abs(output.asnumpy() - expect_log_survival) < tol).all()
def test_entropy(): """ Test entropy. """ loc = np.array([0.0]).astype(np.float32) scale = np.array([[1.0], [2.0]]).astype(np.float32) gumbel_benchmark = stats.gumbel_r(loc, scale) expect_entropy = gumbel_benchmark.entropy().astype(np.float32) entropy = EntropyH() output = entropy() tol = 1e-6 assert (np.abs(output.asnumpy() - expect_entropy) < tol).all()
def test_cdf(): """ Test cdf. """ loc = np.array([0.0]).astype(np.float32) scale = np.array([[1.0], [2.0]]).astype(np.float32) gumbel_benchmark = stats.gumbel_r(loc, scale) expect_cdf = gumbel_benchmark.cdf([1.0, 2.0]).astype(np.float32) cdf = CDF() output = cdf(Tensor([1.0, 2.0], dtype=dtype.float32)) tol = 2e-5 assert (np.abs(output.asnumpy() - expect_cdf) < tol).all()
def test_log_likelihood(): """ Test log_pdf. """ loc = np.array([0.0]).astype(np.float32) scale = np.array([[1.0], [2.0]]).astype(np.float32) gumbel_benchmark = stats.gumbel_r(loc, scale) expect_logpdf = gumbel_benchmark.logpdf([1.0, 2.0]).astype(np.float32) logprob = LogProb() output = logprob(Tensor([1.0, 2.0], dtype=dtype.float32)) tol = 1e-6 assert (np.abs(output.asnumpy() - expect_logpdf) < tol).all()
def __init__(self, rtype, args,name="default"): ''' rv(rtype*, args*) rtype* is the distribution type "n", "ln", "u", "g" and args* is vector with distribution parameters in the following way: ------------------------------------------------------------ normal (n): [m, s**2] !variance! lognormal (ln): [m(n), s**2(n)] !variance! uniform (u): [lower border, upper border] gumbel (g): [loc, scale] ------------------------------------------------------------ letter in brackets is the corresponding shortcut for "rtype" ------------------------------------------------------------ ------------------------------------------------------------ rv properties: "args", "rtype", "sd", "m" methods: "sample" ''' self.rtype=rtype self.args=args self.name=name if rtype=="n": self.mean=args[0] self.sd=args[1] elif rtype=="ln": self.mean=lognorm(s=args[1],scale=math.exp(args[0])).mean() self.sd=lognorm(s=args[1],scale=math.exp(args[0])).std() elif rtype=="g": #updated gumbel by scipy.stats. std/mean self.mean=gumbel_r(loc=args[0],scale=args[1]).mean() self.sd=gumbel_r(loc=args[0],scale=args[1]).std() elif rtype=="u": self.mean=uniform_r(loc=args[0],scale=args[1]).mean() self.sd=uniform_r(loc=args[0],scale=args[1]).std() elif rtype=="e": self.mean=expon(loc=args[0],scale=args[1]).mean() self.sd=expon(loc=args[0],scale=args[1]).std() else: print("distribution {0} not found" .format(rtype)) return("error - distribution")
def test_pdf(): """ Test pdf. """ loc = np.array([0.0]).astype(np.float32) scale = np.array([[1.0], [2.0]]).astype(np.float32) gumbel_benchmark = stats.gumbel_r(loc, scale) value = np.array([1.0, 2.0]).astype(np.float32) expect_pdf = gumbel_benchmark.pdf(value).astype(np.float32) pdf = Prob() output = pdf(Tensor(value, dtype=dtype.float32)) tol = 1e-6 assert (np.abs(output.asnumpy() - expect_pdf) < tol).all()
def __init__(self,oversampled_size=125, oversampling=5,gamma0=2.3,alpha0=1.9,sigma_log_gamma=0.15, max_gamma_factor=4.): self.x0,self.y0 = oversampled_size/2,oversampled_size/2 self.x,self.y = np.mgrid[:oversampled_size, :oversampled_size] self.oversampling = oversampling self.c1 = 0.30 self.sigma = sigma_log_gamma self.gamma0 = gamma0*oversampling self.alpha = alpha0 self.max_gamma = max_gamma_factor*oversampling # With a bit more tail than Gaussian; truncate at some max self.gscatter = stats.gumbel_r(loc=np.log10(self.gamma0),scale=self.sigma)
def ppf(self,p): if self.rtype=="n": return(norm(loc=self.args[0],scale=self.args[1]).ppf(p)) elif self.rtype=="ln": return(lognorm(s=self.args[1],scale=math.exp(self.args[0])).ppf(p)) elif self.rtype=="g": return(gumbel_r(loc=self.args[0],scale=self.args[1]).ppf(p)) elif self.rtype=="e": return(expon(loc=self.args[0],scale=self.args[1]).ppf(p)) elif self.rtype=="u": return(uniform_r(loc=self.args[0],scale=self.args[1]).ppf(p)) else: print("distribution {0} not found" .format(rtype)) return("error - distribution")
def pointintimePf(self, timepoint, register=True): # resistance array if timepoint == 0: rmean = self.resistance_mean[0] rcov = self.resistance_cov[0] else: rmean = self.resistance_mean[self.service_time==timepoint][0] rcov = self.resistance_cov[self.service_time==timepoint][0] if 'f' in self.comp_type: sd = M_DCDW_MEAN elif 's' in self.comp_type: sd = V_DCDW_MEAN elif 'd' in self.comp_type: sd = M_DCDW_DECK_MEAN rstd = rmean*rcov rmean = rmean-sd R = stats.lognorm(np.sqrt(np.log(1+rcov**2)), scale=rmean/np.sqrt(1+rcov**2)) # live load array if 'f' in self.comp_type.lower(): nannual = LL_ARRIVAL_RATE slrv = stats.norm(loc=M_LLIM_MEAN, scale=M_LLIM_MEAN*M_LLIM_COV) elif 's' in self.comp_type.lower(): nannual = LL_ARRIVAL_RATE slrv = stats.norm(loc=V_LLIM_MEAN, scale=V_LLIM_MEAN*V_LLIM_COV) elif 'd' in self.comp_type.lower(): nannual = LL_ARRIVAL_RATE_DECK slrv = stats.norm(loc=M_LLIM_DECK_MEAN, scale=M_LLIM_DECK_MEAN*M_LLIM_DECK_COV) loc = slrv.ppf(1-1./nannual) scale = 1./ (nannual*slrv.pdf(loc)) SL = stats.gumbel_r(loc=loc,scale=scale) mean = SL.mean() rvs = [R, SL] corr = np.eye(2) probdata = ProbData(names=['R','SL'], rvs=rvs, corr=corr, startpoint=[rmean, mean], nataf=False) rvs = [R,SL] def gf1(x, param=None): return x[0]-x[1] def dgdq1(x, param=None): dgd1 = 1. dgd2 = -1. return [dgd1,dgd2] analysisopt = AnalysisOpt(gradflag='DDM', recordu=False, recordx=False, flagsens=False, verbose=False) gfunc = Gfunc(gf1, dgdq1) formBeta = CompReliab(probdata, gfunc, analysisopt) formresults = formBeta.form_result() return formresults.pf1
def __init__(self, mean, stdev, dtype='normal', weib_loc=0): if dtype == 'normal': self.dist = ss.norm(loc=mean, scale=stdev) elif dtype == 'gumbel_r': beta = stdev*sqrt(6)/pi mu = mean - euler_gamma * beta self.dist = ss.gumbel_r(loc=mu, scale=beta) elif dtype == 'gumbel_l': beta = stdev*sqrt(6)/pi mu = mean + euler_gamma * beta self.dist = ss.gumbel_l(loc=mu, scale=beta) elif dtype == 'weibull': self.dist = weibull(mean, stdev, weib_loc) else: print('Error dtype.')
def __init__(self, mean, stdev, dtype='normal', weib_loc=0): if dtype == 'normal': self.dist = ss.norm(loc=mean, scale=stdev) elif dtype == 'gumbel_r': beta = stdev * sqrt(6) / pi mu = mean - euler_gamma * beta self.dist = ss.gumbel_r(loc=mu, scale=beta) elif dtype == 'gumbel_l': beta = stdev * sqrt(6) / pi mu = mean + euler_gamma * beta self.dist = ss.gumbel_l(loc=mu, scale=beta) elif dtype == 'weibull': self.dist = weibull(mean, stdev, weib_loc) else: print('Error dtype.')
def intg2h_mc(uhrv, trunclb, truncub, hbin, abstol=1e-12, reltol=1e-8, nsmp=int(1e5)): uhlb = trunclb[0] uhub = truncub[0] hlb = hbin[0] hub = hbin[1] uhsmp = uhrv.rvs(size=nsmp) hmeansmp = uhsmp+9. pdfsum = 0. for hmean in hmeansmp: hstd = 20. beta = (hstd)/(np.pi/np.sqrt(6)) mu = hmean-np.euler_gamma*beta hrv = stats.gumbel_r(loc=mu, scale=beta) pdfsum += hrv.cdf(hub)-hrv.cdf(hlb) p=pdfsum/nsmp return p
def test_basics(): """ Test mean/standard deviation/mode. """ basics = Basics() mean, sd, mode = basics() loc = np.array([0.0]).astype(np.float32) scale = np.array([[1.0], [2.0]]).astype(np.float32) gumbel_benchmark = stats.gumbel_r(loc, scale) expect_mean = gumbel_benchmark.mean().astype(np.float32) expect_sd = gumbel_benchmark.std().astype(np.float32) expect_mode = np.array([[0.0], [0.0]]).astype(np.float32) tol = 1e-6 assert (np.abs(mean.asnumpy() - expect_mean) < tol).all() assert (np.abs(mode.asnumpy() - expect_mode) < tol).all() assert (np.abs(sd.asnumpy() - expect_sd) < tol).all()
def __init__(self, *args): self.dist_func = [] mean = [] std = [] self.dist_name = [] for dist, mu, sig in args: if dist is 'lognorm': """scipy lognormal Y -> LN(mu_Y, sig_Y) ln(Y) -> N(mu_lnY, sig_lnY) mu_lnY = ln(mu_Y) - 0.5(sig_lnY**2) sig_lnY = sqrt(ln(1 + sig_Y**2/mu_Y**2)) s = sig_lnY scale = exp(mu_lnY) """ s = np.sqrt(np.log(1 + (sig**2) / mu**2)) scale = np.exp(np.log(mu) - .5 * s**2) self.dist_func.append(stats.lognorm(s=s, scale=scale)) elif dist is 'gumbel_r': """scipy gumbel right skw aka extreme type I f(x) = exp(-(x-loc)1/scale) exp(-exp(-(x-loc)1/scale)) 1/scale = a = sqrt(pi**2/(6*sig**2)) loc = u = mu - 0.5772/a """ a = np.sqrt(np.pi**2 / (6 * sig**2)) u = mu - 0.5772 / a self.dist_func.append(stats.gumbel_r(loc=u, scale=1 / a)) else: self.dist_func.append(getattr(stats, dist)(loc=mu, scale=sig)) self.dist_name.append(dist) mean.append(mu) std.append(sig) self.mean = np.array(mean) self.std = np.array(std) self.rho = np.identity(len(args))
def __init__(self, location, scale_parameter): if location is None: self.location = 0.0 else: self.location = location if scale_parameter is None: self.scale_parameter = 1.0 else: self.scale_parameter = scale_parameter self.bounds = np.array([-np.inf, np.inf]) if self.scale_parameter < 0: raise ValueError('Invalid parameter in Gumbel distribution. Scale should be positive.') self.parent = gumbel_r(loc=self.location, scale=self.scale_parameter) self.mean, self.variance, self.skewness, self.kurtosis = self.parent.stats(moments='mvsk') self.x_range_for_pdf = np.linspace(self.location - 10.0, 20.0 + self.location, RECURRENCE_PDF_SAMPLES)
def test_pce_for_gumbel_variable(self): degree = 3 mean, std = 1e4, 7.5e3 beta = std * np.sqrt(6) / np.pi mu = mean - beta * np.euler_gamma rv1 = gumbel_r(loc=mu, scale=beta) assert np.allclose(rv1.mean(), mean) and np.allclose(rv1.std(), std) rv2 = lognorm(1) for rv in [rv2, rv1]: print(rv.dist.name) ncoef = degree + 1 var_trans = AffineRandomVariableTransformation([rv]) poly = PolynomialChaosExpansion() poly_opts = define_poly_options_from_variable_transformation( var_trans) poly_opts['numerically_generated_poly_accuracy_tolerance'] = 1e-9 poly.configure(poly_opts) poly.set_indices(np.arange(degree + 1)[np.newaxis, :]) poly.set_coefficients(np.ones((poly.indices.shape[1], 1))) def integrand(x): p = poly.basis_matrix(x[np.newaxis, :]) G = np.empty((x.shape[0], p.shape[1]**2)) kk = 0 for ii in range(p.shape[1]): for jj in range(p.shape[1]): G[:, kk] = p[:, ii] * p[:, jj] kk += 1 return G * rv.pdf(x)[:, None] lb, ub = rv.interval(1) interval_size = rv.interval(0.99)[1] - rv.interval(0.99)[0] interval_size *= 10 from pyapprox.utilities import \ integrate_using_univariate_gauss_legendre_quadrature_unbounded res = \ integrate_using_univariate_gauss_legendre_quadrature_unbounded( integrand, lb, ub, 10, interval_size=interval_size, verbose=0, max_steps=10000) res = np.reshape(res, (poly.indices.shape[1], poly.indices.shape[1]), order='C') print(res - np.eye(degree + 1)) assert np.allclose(res, np.eye(degree + 1), atol=1e-6)
def testBijector(self): loc = 0.3 scale = 5. bijector = tfb.Gumbel(loc=loc, scale=scale, validate_args=True) self.assertStartsWith(bijector.name, "gumbel") x = np.array([[[-3.], [0.], [0.5], [4.2], [12.]]], dtype=np.float32) # Gumbel distribution gumbel_dist = stats.gumbel_r(loc=loc, scale=scale) y = gumbel_dist.cdf(x).astype(np.float32) self.assertAllClose(y, self.evaluate(bijector.forward(x))) self.assertAllClose(x, self.evaluate(bijector.inverse(y))) self.assertAllClose( np.squeeze(gumbel_dist.logpdf(x), axis=-1), self.evaluate(bijector.forward_log_det_jacobian(x, event_ndims=1))) self.assertAllClose( self.evaluate(-bijector.inverse_log_det_jacobian(y, event_ndims=1)), self.evaluate(bijector.forward_log_det_jacobian(x, event_ndims=1)), rtol=1e-4, atol=0.)
def __init__(self, location, scale_parameter): self.scale_parameter = scale_parameter self.location = location if self.scale_parameter is not None: self.bounds = np.array([-np.inf, np.inf]) if self.scale_parameter > 0: mean, var, skew, kurt = gumbel_r.stats( loc=self.location, scale=self.scale_parameter, moments='mvsk') self.parent = gumbel_r(loc=self.location, scale=self.scale_parameter) self.mean = mean self.variance = var self.skewness = skew self.kurtosis = kurt self.x_range_for_pdf = np.linspace(self.location - 10.0, 20.0 + self.location, RECURRENCE_PDF_SAMPLES)
def dist_generator(leaf, trial_number, coefs): """ Args: leaf: terminal node ID trial_number: number of simulations coefs: parameters of the Gumbel distribution for each leaf Returns: an array with shape (1, trial_number) where each value is a random draw from the distribution of the leaf """ g_model = gumbel_r(loc=coefs['loc'][leaf], scale=coefs['scale'][leaf]) sim_results = g_model.rvs(size=trial_number) # Find and replace the non-positive samples. for i in range(trial_number): if sim_results[i] <= 0: sim_results[i] = g_model.rvs(size=1) return np.ceil(sim_results)
def __init__(self, mu, beta): """ Parameters ---------- mu : float, positive Location parameter beta : float, positive Scale parameter """ assert beta > 0, "scale parameter must be positive" # Parameters self.mu = mu self.beta = beta # Scipy backend self.sp = gumbel_r(loc=mu, scale=beta) # Initialize super super().__init__()
def testBijector(self): with self.test_session(): loc = 0.3 scale = 5. bijector = Gumbel(loc=loc, scale=scale, validate_args=True) self.assertEqual("gumbel", bijector.name) x = np.array([[[-3.], [0.], [0.5], [4.2], [12.]]], dtype=np.float32) # Gumbel distribution gumbel_dist = stats.gumbel_r(loc=loc, scale=scale) y = gumbel_dist.cdf(x).astype(np.float32) self.assertAllClose(y, bijector.forward(x).eval()) self.assertAllClose(x, bijector.inverse(y).eval()) self.assertAllClose( np.squeeze(gumbel_dist.logpdf(x), axis=-1), bijector.forward_log_det_jacobian(x, event_ndims=1).eval()) self.assertAllClose( -bijector.inverse_log_det_jacobian(y, event_ndims=1).eval(), bijector.forward_log_det_jacobian(x, event_ndims=1).eval(), rtol=1e-4, atol=0.)
def intg2h_mc(uhrv, trunclb, truncub, hbin, abstol=1e-12, reltol=1e-8, nsmp=int(1e5)): uhlb = trunclb[0] uhub = truncub[0] hlb = hbin[0] hub = hbin[1] uhsmp = uhrv.rvs(size=nsmp) hmeansmp = uhsmp + 9. pdfsum = 0. for hmean in hmeansmp: hstd = 20. beta = (hstd) / (np.pi / np.sqrt(6)) mu = hmean - np.euler_gamma * beta hrv = stats.gumbel_r(loc=mu, scale=beta) pdfsum += hrv.cdf(hub) - hrv.cdf(hlb) p = pdfsum / nsmp return p
def adaptive_integrate(f1, f2, key, value): '''inputs: f1: function 1 of x, function string f2: function 2 of x, function string key: distribution type of random variable, string value: parameters of random distribution, tuple outputs: y: integral value ''' if key.startswith('Uniform'): # stats.uniform defined in the range of [0, 1] # we have to convert it to [-1, 1] for the definition of Legendre basis # stats.uniform(location, scale) # or we can also do arbitrary type, will work on this later f_distr = stats.uniform(-1, 2) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, -1, 1) elif key.startswith('Gaussian'): # this is for hermite polynomial basis # we can do arbitrary type by not using standard normal distribution # will work on this later f_distr = stats.norm(0, 1) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, -npy.inf, npy.inf) elif key.startswith('Gamma'): # compare the stats.gamma with the one showed in UQLab tutorial (input) # stats.gamma accepts only one value, but UQLab accepts two # we can do the location and scale to make them the same # argument "1" is for the "standardized" format # or we can do arbitrary type later # value[0]: lambda, value[1]: k (a for stats.gamma) a = value[1] loc = 0 scale = 1./value[0] # stats.gamma uses "beta" instead of "lambda" f_distr = stats.gamma(a, loc, scale) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, 0, npy.inf) elif key.startswith('Beta'): # compare the stats.beta with the one showed in UQLab tutorial (input) # stats.beta accepts only one value, but UQLab accepts two # we can do the location and scale to make them the same # value[0]: alpha, value[1]: beta, no "loc" or "scale" needed # always in the range of [0, 1] alpha = value[0] beta = value[1] f_distr = stats.beta(alpha, beta) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, 0, 1) elif key.startswith('Exponential'): # value: lambda loc = 0 scale = 1./value f_distr = stats.expon(loc, scale) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, 0, npy.inf) elif key.startswith('Lognormal'): # this part is very interesting # in UQLab they do Hermite for lognormal # and U the same as those from gaussian # then convert U to X using exp(U) # or they can specify arbitrary polynomial basis to be the same as here # we can do both, actually # value[0]: mu, value[1]:sigma s = value[1] loc = 0 scale = npy.exp(value[0]) f_distr = stats.lognorm(s, loc, scale) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, 0, npy.inf) elif key.startswith('Gumbel'): # compare the stats.gumbel_r with the one showed in UQLab tutorial (input) # stats.gamma accepts only one value, but UQLab accepts two # we can do the location and scale to make them the same # value[0]: mu, value[1]: beta loc = value[0] scale = value[1] f_distr = stats.gumbel_r(loc, scale) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, -npy.inf, npy.inf) elif key.startswith('Weibull'): # compare the stats.weibull_min with the one showed in UQLab tutorial (input) # stats.gamma accepts only one value, but UQLab accepts two # we can do the location and scale to make them the same # value[0]: lambda, value[1]: k k = value[1] loc = 0 scale = value[0] f_distr = stats.weibull_min(k, loc, scale) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, 0, npy.inf) elif key.startswith('Triangular'): # compare the stats.triang with the one showed in UQLab tutorial (input) # stats.gamma accepts only one value, but UQLab accepts two # we can do the location and scale to make them the same # value: c, no "loc" and "scale" needed # always in the range of [0, 1] c = value f_distr = stats.triang(c) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, 0, 1) elif key.startswith('Logistic'): # compare the stats.logistic with the one showed in UQLab tutorial (input) # stats.gamma accepts only one value, but UQLab accepts two # we can do the location and scale to make them the same # value[0]: location, value[1]: scale loc = value[0] scale = value[1] f_distr = stats.logistic(loc, scale) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, -npy.inf, npy.inf) elif key.startswith('Laplace'): # compare the stats.laplace with the one showed in UQLab tutorial (input) # stats.gamma accepts only one value, but UQLab accepts two # we can do the location and scale to make them the same # value[0]: location, value[1]: scale loc = value[0] scale = value[1] f_distr = stats.laplace(loc, scale) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, -npy.inf, npy.inf) else: print 'other types of statistical distributsions are coming soon ...' return y[0]
plt.plot(t, gumbel_cdf(t), "k--", label="Gumbel") plt.legend() #%% Convergence of (M-b)/a towards standard Gumbel for pdf sigma = 1.0 t = np.linspace(0, 8, 1000) plt.figure() for n in [10, 100, 1000, 10000]: a = sigma / np.sqrt(2 * np.log(n)) b = sigma * np.sqrt(2 * np.log(n)) plt.plot(t, a * np.exp(maxrayleigh_logpdf(a * t + b, n, sigma)), label=f"n={n}") plt.plot(t, gumbel_pdf(t), "k--", label="Gumbel") plt.legend() #%% Convergence of M towards Gumbel for pdf sigma = 1.0 t = np.linspace(0, 8, 1000)[1:] plt.figure() for n in [10, 100, 1000, 10000]: a = sigma / np.sqrt(2 * np.log(n)) b = sigma * np.sqrt(2 * np.log(n)) plt.plot(t, np.exp(maxrayleigh_logpdf(t, n, sigma)), label=f"n={n}") plt.plot(t, stats.gumbel_r(loc=b, scale=a).pdf(t), "k--", label=f"Gumbel n={n}") plt.legend()
# -*- coding: utf-8 -*- """ Created on Mon Jan 05 13:24:41 2015 @author: rarossi """ import scipy as sp from scipy import stats def printppf(model, rv): print '%s\t%.2f\t%.2f\t%.2f\t%.2f' % (model, rv.ppf(0.9),rv.ppf(0.99),rv.ppf(0.999),rv.ppf(0.9999)) data = sp.loadtxt('data25h.txt',skiprows=1, usecols= (1,)) data -= data.mean() print 'Model\t90%\t99%\t99.9%\t99.99%' l, s = stats.rayleigh.fit(data) ray = stats.rayleigh(scale=s, loc=l) printppf('Rayleigh', ray) sp, l, s = stats.weibull_max.fit(data) wei = stats.weibull_max(scale=s, loc=l, c=sp) printppf('Weibull', wei) l, s = stats.gumbel_r.fit(data) gum = stats.gumbel_r(scale=s, loc=l) printppf('Gumbel',gum)
gumbel = best_fit(tail) loc, scale = gumbel.fit(tail) mygl = gumbel(loc=loc, scale=scale) plt.subplot(222) stats.probplot(tail,dist=mygl,plot=plt) title('bets fit gumbel') loc, scale = stats.gumbel_l.fit(tail) mygl = stats.gumbel_l(loc=loc, scale=scale) plt.subplot(223) stats.probplot(tail,dist=mygl,plot=plt) title('gumbel l') loc, scale = stats.gumbel_r.fit(tail) mygr = stats.gumbel_r(loc=loc, scale=scale) plt.subplot(224) stats.probplot(tail,dist=mygr,plot=plt) title('gumbel r') #import pandas # ##list with the path to various results files from repeated lowering analyses #with open('list_results.txt', 'r') as pf: # list_results = pf.readlines() # ##write statistics to this file #global_statistics_file = open('global_statistics.txt','w') #global_statistics_file.write('File \t Total Cases \t Max \t Min \t Max_l \t Max_r \t ') #global_statistics_file.write('Min_l \t Min_r \t Max_bad \t Min_bad\n') #
2000, retstep=True) confidence = 1.0 - pfail(load_distro.pdf, res_distro.cdf, x, dx) return confidence - conf_target # %% if __name__ == '__main__': # input data conf_target = 0.9 # confidence level of non-failure load_loc = 100 # location parameter for the load distribution load_scale = 5 # scale parameter for the load distribution res_scale = 3.5 # scale parameter for the resistance distribution eps = 1e-8 # domain = pdf > eps, for load and resistance # frozen load distribution load_distro = ss.gumbel_r(loc=load_loc, scale=load_scale) # finds the location parameter for the resistance distribution that # gives the required conf_target res_loc = sp.optimize.bisect(optimize_loc, load_loc, load_distro.ppf(1-eps), args=(res_scale, load_distro, conf_target, eps)) # frozen resistance distribution res_distro = ss.gumbel_l(loc=res_loc, scale=res_scale) # recalculates the domain and the confidence level x, dx = np.linspace(min(load_distro.ppf(eps), res_distro.ppf(eps)), max(load_distro.ppf(1-eps), res_distro.ppf(1-eps)), 200, retstep=True) confidence = 1.0 - pfail(load_distro.pdf, res_distro.cdf, x, dx) # %% plotting plt.plot(x, load_distro.pdf(x), label='load pdf')
# -*- coding: utf-8 -*- """ Created on Thu Mar 31 13:53:38 2016 @author: rarossi """ from matplotlib import pyplot as plt from scipy import stats as ss from numpy import linspace g = ss.gumbel_r(loc=0, scale=1) n=1000 x = linspace(-2, 5, n) plt.figure(num=None, figsize=(9, 4), dpi=80, facecolor='w', edgecolor='k') plt.plot(x, g.pdf(x), label='pdf', lw=3) plt.plot(x, 0.5*g.cdf(x), label='cdf', lw=3) plt.xticks([]) plt.yticks([]) plt.xlabel('variate') plt.ylabel('probability') plt.legend(loc='best')
def compare_to_blank(blank_model_size, p_val=0.05, sparse_rounds=False, interactome_interface_instance=None): """ Recovers the statistics on the circulation nodes and shows the visual of a circulation system. There is no issue with using the same interactome interface instance, because they are forked when threads are generated and will not interfere. :param blank_model_size: the number of uniprots in the blank model :param p_val: desired p_value for the returned terms :param sparse_rounds: if set to a number, sparse computation technique would be used with the number of rounds equal the integer value of that argument :param interactome_interface_instance: :return: None if no significant nodes, the node and group characteristic dictionaries otherwise """ def get_max_for_each_degree(sample_sub_arrray): degrees = np.unique(sample_sub_arrray[1, :]) max_array = [] for degree in degrees: filter = sample_sub_arrray[1, :] == degree max_array.append([sample_sub_arrray[0, filter].max(), degree]) m_arr = np.array(max_array) return m_arr.T if interactome_interface_instance is None: interactome_interface_instance = InteractomeInterface(True, True) interactome_interface_instance.fast_load() md5_hash = interactome_interface_instance.md5_hash() background_sub_array_list = [] max_sub_array_list = [] count = 0 log.info("looking to test against:" "\t size: %s \t sys_hash: %s \t sparse_rounds: %s" % (blank_model_size, md5_hash, sparse_rounds)) log.info("samples found to test against:\t %s" % interactome_rand_samp_db.find({'size': blank_model_size, 'sys_hash': md5_hash, 'sparse_rounds': sparse_rounds} ).count()) for i, sample in enumerate(interactome_rand_samp_db.find( {'size': blank_model_size, 'sys_hash': md5_hash, 'sparse_rounds': sparse_rounds})): _, node_currents = pickle.loads(sample['currents']) dictionary_system = interactome_interface_instance.format_node_props(node_currents, limit=0) background_sub_array = list(dictionary_system.values()) background_sub_array_list.append(np.array(background_sub_array).T) max_arr = get_max_for_each_degree(np.array(background_sub_array).T) max_sub_array_list.append(max_arr) count = i # This part declares the pre-operators required for the verification of a # real sample background_array = np.concatenate(tuple(background_sub_array_list), axis=1) max_array = np.concatenate(tuple(max_sub_array_list), axis=1) node_currents = interactome_interface_instance.node_current dictionary_system = interactome_interface_instance.format_node_props(node_currents) curr_inf_conf_tot = np.array( [[int(key)] + list(val) for key, val in dictionary_system.items()]).T node_ids, query_array = (curr_inf_conf_tot[0, :], curr_inf_conf_tot[(1, 2), :]) log.info("stats on %s samples" % count) background_density = kde_compute(background_array[(1, 0), :], 50, count) base_bi_corr = background_array[(0, 1), :] r_rels = [] r_std_nodes = [] # TODO: idea for the improved statistics, cluster a test node of degree k with 100 nodes with # closest degrees samples_scatter_and_hist(background_array, query_array) degrees = np.unique(query_array[1, :]) combined_p_vals = np.ones_like(query_array[1, :]) for degree in degrees.tolist(): filter = query_array[1, :] == degree entry = query_array[:, filter] background_set = background_array[:, background_array[1, :] == degree] max_set = max_array[:, max_array[1, :] == degree] params = gumbel_r.fit(max_set[0, :]) arg = params[:-2] mu = params[-2] beta = params[-1] frozen_gumbel = gumbel_r(loc=mu, scale=beta) p_vals = 1 - frozen_gumbel.cdf(entry[0, :]) combined_p_vals[filter] = p_vals # TODO: insert into appropriate locations => we will assume that the order is preserved # samples_scatter_and_hist(max_set, entry) r_nodes = background_density(query_array[(1, 0), :]) # this is currently used as a p-value, which is problematic. r_nodes = combined_p_vals for point in query_array.T: selector = np.logical_and(base_bi_corr[1, :] > point[1]*0.9, base_bi_corr[1, :] < point[1]*1.1) r_rels.append(point[0]/np.mean(base_bi_corr[0, selector])) r_std_nodes.append((point[0]-np.mean(base_bi_corr[0, selector]))/np.std(base_bi_corr[0, selector])) r_rels = np.array(r_rels) r_std_nodes = np.array(r_std_nodes) not_random_nodes = [node_id for node_id in node_ids[r_nodes < p_val].tolist()] # basically the second element below are the nodes that contribute to the # information flow through the node that is considered as non-random log.debug('debug, not random nodes: %s', not_random_nodes) log.debug('debug bulbs_id_disp_name: %s', interactome_interface_instance.neo4j_id_2_display_name.items()[:10]) node_char_list = [ [int(nr_node_id), interactome_interface_instance.neo4j_id_2_display_name[nr_node_id]] + dictionary_system[nr_node_id] + r_nodes[node_ids == float(nr_node_id)].tolist() for nr_node_id in not_random_nodes] nodes_dict = np.hstack((node_ids[:, np.newaxis], r_nodes[:, np.newaxis], r_rels[:, np.newaxis], r_std_nodes[:, np.newaxis])) nodes_dict = dict((node[0], (node[1], node[2], node[3])) for node in nodes_dict.tolist()) nodes_dict = defaultdict(lambda: (1., 0., 0.), nodes_dict) # corresponds to the cases of super low flow - never significant # TODO: pull the groups corresponding to non-random associations. return sorted(node_char_list, key=lambda x: x[4]), nodes_dict
pl.plot(x3, y, '-', x3, y2, '.') pl.plot(x4, y, '-', x4, y2, '.') pl.legend(['train target', 'net output']) pl.show() n_serie = 5 # 1, 2, 3, 4, 5 n_sim = 10 # 10, 20, 30, 40, 50, 60 n_MC = 1000000 n_cMC = 10 ann_struct = '4i-1l' x1 = norm(0.002244, 4.488e-5) # mean, std # zeta, scale=e^lambda x2 = lognorm(0.06991447685, loc=0, scale=np.exp(12.96291461)) # zeta, scale=e^lambda x3 = lognorm(0.05994610505, loc=0, scale=np.exp(10.00207896)) x4 = gumbel_r(loc=0.03909989358, scale=0.001559393602) # loc=Mode, scale=beta n_MC = 1000000 x1_MC = x1.rvs(n_MC) x2_MC = x2.rvs(n_MC) x3_MC = x3.rvs(n_MC) x4_MC = x4.rvs(n_MC) data = np.vstack((x1_MC, x2_MC, x3_MC, x4_MC)).T data = (data - data.min(axis=0)) / \ (data.max(axis=0) - data.min(axis=0)) * 2 - 1 rsm_MC = net.sim(data) print rsm_MC pf = np.sum(rsm_MC < 0) / float(n_MC)
191.0, 192.4, 188.1, 189.8, 189.7, 191.0, 189.3, 188.9, 194.7, 187.9, 189.9, 191.2, 190.3, 192.1, 191.2, 189.8, 191.4, 192.2, 190.4, 188.8, 192.6, 191.0, 188.5, 190.8, 187.0, 190.8, 192.0, 190.7, 186.9, 188.7, 191.7, 190.9, 191.2, 191.3, 190.3, 188.9, 190.6, 191.0, 191.4, 188.7, 190.6, 189.9, 192.4, 189.6, 199.6, 193.6, 209.5, 191.7, 189.9, 189.5, 190.9, 189.6, 189.6, 198.0, 192.2, 189.7, 188.9, 188.8, 189.9, 189.3, 189.9, 191.0, 194.6, 190.1, 190.7, 187.7, 189.0, 190.3, 189.4, 190.2, 190.6, 192.1, 190.5, 186.6, 190.8, 192.1, 191.0, 189.9, 189.5, 189.2, 191.1, 189.9, 190.3, 190.2, 191.3, 188.2, 190.5, 189.3, 188.9, 188.7, 189.4, 191.3, 190.9, 189.9, 189.0, 191.0, 190.1, 191.3, 190.1, 189.2, 189.3, 189.8, 187.6, 189.9, 191.4, 189.5, 192.0, 188.5, 188.8, 188.9, 190.7, 189.3, 190.0, 192.4, 190.5, 189.7, 190.9, 191.2, 189.4, 190.6, 189.5, 192.0, 187.9, 189.6, 190.9, 193.0, 190.0, 191.6, 190.9, 188.5, 189.4, 189.5, 188.5, 189.7, 188.2, 190.8, 186.9, 189.3, 189.8, 188.6, 191.4, 190.9, 188.8, 191.9, 188.2, 191.7, 205.6, 204.6] from scipy import stats confidence=0.9999 loc,scale = stats.gumbel_r.fit(data) g=stats.gumbel_r(loc=loc,scale=scale) print "Using all data %.2f" % g.ppf(confidence) threshold = 192.0 fdata = [x for x in data if x>=threshold] loc,scale = stats.gumbel_r.fit(fdata) fg=stats.gumbel_r(loc=loc,scale=scale) print "Using threshold %.2f" % fg.ppf(confidence) plt.hist(data, 30, normed=1) plt.hist(fdata, 30, normed=1) print """ Note that after threshold the histogram becomes not valid for a Gumbel distribution. So applying a threshold is not a good idea in this case. """
m.environment.WaveType = 'JONSWAP' m.environment.UserSpecifiedRandomWaveSeeds = 'Yes' seeds = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] sample3h = [] for seed in seeds: print('Running seed {0}'.format(seed)) m.environment.WaveSeed = seed m.RunSimulation() sample3h.append(m.environment.AnalyseExtrema('Elevation', period=1, objectExtra=of.oeEnvironment((0, 0, 0))).Max) m.general.StageDuration[1] = 1800 seeds = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] sample10min = [] for seed in seeds: print('Running seed {0}'.format(seed)) m.environment.WaveSeed = seed m.RunSimulation() sample10min.append(m.environment.AnalyseExtrema('Elevation', period=1, objectExtra=of.oeEnvironment((0, 0, 0))).Max) print('P90 from 3h simulations: %.2f' % (ss.gumbel_r(*ss.gumbel_r.fit(sample3h)).ppf(0.9))) corr_p = 0.9**(180/30) print('Corrected percentile for 30min operation: %.2f' % corr_p) print('P%d from 3h simulations: %.2f' % (100*corr_p, ss.gumbel_r(*ss.gumbel_r.fit(sample3h)).ppf(corr_p))) print('P90 from 30min simulations: %.2f' % (ss.gumbel_r(*ss.gumbel_r.fit(sample10min)).ppf(0.9)))
logmean = np.log(150./np.sqrt(1+0.2**2)) logstd = np.sqrt(np.log(1+0.2**2)) rv4 = stats.lognorm(logstd, scale=np.exp(logmean)) rv5 = stats.lognorm(logstd, scale=np.exp(logmean)) ur = stats.norm() u1 = stats.norm() u2 = stats.norm() u3 = stats.norm() u4 = stats.norm() u5 = stats.norm() mmean=0.; mstd=15. rvte = stats.norm(loc=mmean, scale=mstd) # testing error, normal with mean=0, std=15kNm beta = (50.*0.4)/(np.pi/np.sqrt(6)) mu = 50.-np.euler_gamma*beta h = stats.gumbel_r(loc=mu, scale=beta) theta = (60*0.2)**2/60. k = 60./theta v = stats.gamma(k, scale=theta) rolnR = 0.3 # prior reliability using pyStRe rvnames = ['ur', 'u1', 'u2', 'u3', 'u4', 'u5', 'h', 'v'] rvs = [ur, u1, u2, u3, u4, u5, h, v] syspf = sys_prior(rvnames, rvs, rolnR) print "Prior system failure probability is {}".format(syspf) print "Prior system reliability is {}".format(stats.norm.ppf(1-syspf)) # network model
def _ppp_lieblein(self): """ Lieblein-BLUE (Best Linear Unbiased Estimator) to obtain extreme values using a Type I (Gumbel) extreme value distribution. It approaches the calculation of extremes using a very classical methodology provided by Julius Lieblein. It exists just to check how several consultants made the calculation of wind speed extremes in the wind energy industry. It calculates extremes using an adjustment of Gumbel distribution using least squares fit and considering several probability-plotting positions used in the wild. **References** Lieblein J, (1974), 'Efficient methods of Extreme-Value Methodology', NBSIR 74-602, National Bureau of Standards, U.S. Department of Commerce. """ # coefficients for sample below or equal to 16 elements ai = { 'n = 02': [0.916373, 0.083627], 'n = 03': [0.656320, 0.255714, 0.087966], 'n = 04': [0.510998, 0.263943, 0.153680, 0.071380], 'n = 05': [0.418934, 0.246282, 0.167609, 0.108824, 0.058350], 'n = 06': [0.355450, 0.225488, 0.165620, 0.121054, 0.083522, 0.048867], 'n = 07': [0.309008, 0.206260, 0.158590, 0.123223, 0.093747, 0.067331, 0.041841], 'n = 08': [0.273535, 0.189428, 0.150200, 0.121174, 0.097142, 0.075904, 0.056132, 0.036485], 'n = 09': [0.245539, 0.174882, 0.141789, 0.117357, 0.097218, 0.079569, 0.063400, 0.047957, 0.032291], 'n = 10': [0.222867, 0.162308, 0.133845, 0.112868, 0.095636, 0.080618, 0.066988, 0.054193, 0.041748, 0.028929], 'n = 11': [0.204123, 0.151384, 0.126522, 0.108226, 0.093234, 0.080222, 0.068485, 0.057578, 0.047159, 0.036886, 0.026180], 'n = 12': [0.188361, 0.141833, 0.119838, 0.103673, 0.090455, 0.079018, 0.068747, 0.059266, 0.050303, 0.041628, 0.032984, 0.023894], 'n = 13': [0.174916, 0.133422, 0.113759, 0.099323, 0.087540, 0.077368, 0.068264, 0.059900, 0.052047, 0.044528, 0.037177, 0.029790, 0.021965], 'n = 14': [0.163309, 0.125966, 0.108230, 0.095223, 0.084619, 0.075484, 0.067331, 0.059866, 0.052891, 0.046260, 0.039847, 0.033526, 0.027131, 0.020317], 'n = 15': [0.153184, 0.119314, 0.103196, 0.091384, 0.081767, 0.073495, 0.066128, 0.059401, 0.053140, 0.047217, 0.041529, 0.035984, 0.030484, 0.024887, 0.018894], 'n = 16': [0.144271, 0.113346, 0.098600, 0.087801, 0.079021, 0.071476, 0.064771, 0.058660, 0.052989, 0.047646, 0.042539, 0.037597, 0.032748, 0.027911, 0.022969, 0.017653] } bi = { 'n = 02': [-0.721348, 0.721348], 'n = 03': [-0.630541, 0.255816, 0.374725], 'n = 04': [-0.558619, 0.085903, 0.223919, 0.248797], 'n = 05': [-0.503127, 0.006534, 0.130455, 0.181656, 0.184483], 'n = 06': [-0.459273, -0.035992, 0.073199, 0.126724, 0.149534, 0.145807], 'n = 07': [-0.423700, -0.060698, 0.036192, 0.087339, 0.114868, 0.125859, 0.120141], 'n = 08': [-0.394187, -0.075767, 0.011124, 0.058928, 0.087162, 0.102728, 0.108074, 0.101936], 'n = 09': [-0.369242, -0.085203, -0.006486, 0.037977, 0.065574, 0.082654, 0.091965, 0.094369, 0.088391], 'n = 10': [-0.347830, -0.091158, -0.019210, 0.022179, 0.048671, 0.066064, 0.077021, 0.082771, 0.083552, 0.077940], 'n = 11': [-0.329210, -0.094869, -0.028604, 0.010032, 0.035284, 0.052464, 0.064071, 0.071381, 0.074977, 0.074830, 0.069644], 'n = 12': [-0.312840, -0.097086, -0.035655, 0.000534, 0.024548, 0.041278, 0.053053, 0.061112, 0.066122, 0.068357, 0.067671, 0.062906], 'n = 13': [-0.298313, -0.098284, -0.041013, -0.006997, 0.015836, 0.032014, 0.043710, 0.052101, 0.057862, 0.061355, 0.062699, 0.061699, 0.057330], 'n = 14': [-0.285316, -0.098775, -0.045120, -0.013039, 0.008690, 0.024282, 0.035768, 0.044262, 0.050418, 0.054624, 0.057083, 0.057829, 0.056652, 0.052642], 'n = 15': [-0.273606, -0.098768, -0.048285, -0.017934, 0.002773, 0.017779, 0.028988, 0.037452, 0.043798, 0.048415, 0.051534, 0.053267, 0.053603, 0.052334, 0.048648], 'n = 16': [-0.262990, -0.098406, -0.050731, -0.021933, -0.002167, 0.012270, 0.023168, 0.031528, 0.037939, 0.042787, 0.046308, 0.048646, 0.049860, 0.049912, 0.048602, 0.045207] } data = _np.sort(self.data) data = data ** self.preconditioning N = self.N # hyp and coeffs are used to calculate values for samples higher than 16 elements # Hypergeometric distribution function def hyp(n,m,i,t): bin1 = _fact(i)/(_fact(t) * _fact(i - t)) bin2 = _fact(n-i)/(_fact(m-t) * _fact((n-i) - (m-t))) bin3 = _fact(n)/(_fact(m) * _fact(n - m)) return bin1 * bin2 / bin3 # Coefficients def coeffs(n, m): aip = [] bip = [] for i in range(n): a = 0 b = 0 for t in range(m): try: a += (ai['n = {:02}'.format(m)][t] * ((t + 1) / (i + 1)) * hyp(n, m, i + 1, t + 1)) b += (bi['n = {:02}'.format(m)][t] * ((t + 1) / (i + 1)) * hyp(n, m, i + 1, t + 1)) except: pass aip.append(a) bip.append(b) return aip, bip def distr_params(): if N <= 16: mu = _np.sum(_np.array(ai['n = {:02}'.format(N)]) * data) #parameter u in the paper sigma = _np.sum(_np.array(bi['n = {:02}'.format(N)]) * data) #parameter b in the paper else: aip, bip = coeffs(N, 16) mu = _np.sum(_np.array(aip) * data) sigma = _np.sum(_np.array(bip) * data) return mu, sigma mu, sigma = distr_params() return_period = _np.arange(2, 100 + 1) P = ((_np.arange(N) + 1)) / (N + 1) Y = -_np.log(-_np.log(P)) vref = (- sigma * _np.log(-_np.log(1 - 1 / return_period)) + mu)**(1 / self.preconditioning) self.results = {} self.results['Y'] = Y self.results['data'] = data self.results['Values for return period from 2 to 100 years'] = vref self.results['slope'] = sigma self.results['offset'] = mu self.c = 0 self.loc = self.results['offset'] self.scale = self.results['slope'] self.distr = _st.gumbel_r(loc = self.loc, scale = self.scale)
def _calculate_values(self, how = None): data = _np.sort(self.data) data = data ** self.preconditioning N = self.N if how == 'Adamowski': # see De, M., 2000. A new unbiased plotting position formula for gumbel distribution. # Stochastic Envir. Res. Risk Asses., 14: 1-7. P = ((_np.arange(N) + 1) - 0.25) / (N + 0.5) if how == 'Beard': # see De, M., 2000. A new unbiased plotting position formula for gumbel distribution. # Stochastic Envir. Res. Risk Asses., 14: 1-7. P = ((_np.arange(N) + 1) - 0.31) / (N + 0.38) if how == 'Blom': # see Adeboye, O.B. and M.O. Alatise, 2007. Performance of probability distributions and plotting # positions in estimating the flood of River Osun at Apoje Sub-basin, Nigeria. Agric. Eng. Int.: CIGR J., Vol. 9. P = ((_np.arange(N) + 1) - 0.375) / (N + 0.25) if how == 'Chegodayev': # see De, M., 2000. A new unbiased plotting position formula for gumbel distribution. # Stochastic Envir. Res. Risk Asses., 14: 1-7. P = ((_np.arange(N) + 1) - 0.3) / (N + 0.4) if how == 'Cunnane': # see Cunnane, C., 1978. Unbiased plotting positions: A review. J. Hydrol., 37: 205-222. P = ((_np.arange(N) + 1) - 0.4) / (N + 0.2) if how == 'Gringorten': # see Adeboye, O.B. and M.O. Alatise, 2007. Performance of probability distributions and plotting # positions in estimating the flood of River Osun at Apoje Sub-basin, Nigeria. Agric. Eng. Int.: CIGR J., Vol. 9. P = ((_np.arange(N) + 1) - 0.44) / (N + 0.12) if how == 'Hazen': # see Adeboye, O.B. and M.O. Alatise, 2007. Performance of probability distributions and plotting # positions in estimating the flood of River Osun at Apoje Sub-basin, Nigeria. Agric. Eng. Int.: CIGR J., Vol. 9. P = ((_np.arange(N) + 1) - 0.5) / (N) if how == 'Hirsch': # see Jay, R.L., O. Kalman and M. Jenkins, 1998. Integrated planning and management for Urban water # supplies considering multi uncertainties. Technical Report, # Department of Civil and Environmental Engineering, Universities of California. P = ((_np.arange(N) + 1) + 0.5) / (N + 1) if how == 'IEC56': # see Forthegill, J.C., 1990. Estimating the cumulative probability of failure data points to be # plotted on weibull and other probability paper. Electr. Insulation Transact., 25: 489-492. P = ((_np.arange(N) + 1) - 0.5) / (N + 0.25) if how == 'Landwehr': # see Makkonen, L., 2008. Problem in the extreme value analysis. Structural Safety, 30: 405-419. P = ((_np.arange(N) + 1) - 0.35) / (N) if how == 'Laplace': # see Jay, R.L., O. Kalman and M. Jenkins, 1998. Integrated planning and management for Urban # water supplies considering multi uncertainties. Technical Report, # Department of Civil and Environmental Engineering, Universities of California. P = ((_np.arange(N) + 1) + 1) / (N + 2) if how == 'McClung and Mears': # see Makkonen, L., 2008. Problem in the extreme value analysis. Structural Safety, 30: 405-419. P = ((_np.arange(N) + 1) - 0.4) / (N) if how == 'Tukey': # see Makkonen, L., 2008. Problem in the extreme value analysis. Structural Safety, 30: 405-419. P = ((_np.arange(N) + 1) - 1/3) / (N + 1/3) if how == 'Weibull': # see Hynman, R.J. and Y. Fan, 1996. Sample quantiles in statistical packages. Am. Stat., 50: 361-365. P = ((_np.arange(N) + 1)) / (N + 1) Y = -_np.log(-_np.log(P)) slope, offset = _np.polyfit(Y, data, 1) R2 = _np.corrcoef(Y, data)[0, 1]**2 #fit = slope * Y + offset return_period = _np.arange(2,101) vref = (- slope * _np.log(-_np.log(1 - 1 / return_period)) + offset)**(1 / self.preconditioning) self.results = {} self.results['data'] = data self.results['Y'] = Y self.results['Values for return period from 2 to 100 years'] = vref self.results['R2'] = R2 self.results['slope'] = slope self.results['offset'] = offset self.c = 0 self.loc = self.results['offset'] self.scale = self.results['slope'] self.distr = _st.gumbel_r(loc = self.loc, scale = self.scale)
sys.exit(0) return results if __name__ == "__main__": from probdata import ProbData from analysisopt import AnalysisOpt from gfunc import Gfunc import scipy.stats as stats # uncorrelated case in example 5.11 in Nowak's book (2013) Z = stats.norm(loc=100.0, scale=100.0 * 0.04) Fy = stats.lognorm(np.sqrt(np.log(1 + 0.1 ** 2)), scale=40.0 / np.sqrt(1 + 0.1 ** 2)) M = stats.gumbel_r(loc=2000 - np.sqrt(6) * 200 / np.pi * np.euler_gamma, scale=np.sqrt(6) * 200 / np.pi) rvs = [Z, Fy, M] corr = np.array([[1.0, 0.9, 0.0], [0.9, 1.0, 0.0], [0.0, 0.0, 1.0]]) probdata = ProbData(names=["Z", "Fy", "M"], rvs=rvs, corr=corr, startpoint=[100.0, 40.0, 2000.0], nataf=False) def gf1(x, param=None): return x[0] * x[1] - x[2] def dgdq1(x, param=None): dgd1 = x[1] dgd2 = x[0] dgd3 = -1.0 return [dgd1, dgd2, dgd3] analysisopt = AnalysisOpt(gradflag="DDM", recordu=False, recordx=False, flagsens=False, verbose=False) gfunc = Gfunc(gf1, dgdq1)
bestperiod = 1./fbest[0] bestperiod2 = 1./fbest[1] bestperiod3 = 1./fbest[2] bestperiod4 = 1./fbest[3] pbest_bootstrap = np.zeros(shape=(100, 2)) for index in range(pbest_bootstrap.shape[0]): P = np.random.permutation(len(mjd)) my_per.set_data(mjd, mag[P], err[P]) my_per.frequency_grid_evaluation(fmin=0.0, fmax=4.0, fresolution=1e-3) my_per.finetune_best_frequencies(fresolution=1e-4, n_local_optima=pbest_bootstrap.shape[1]) _, pbest_bootstrap[index, :] = my_per.get_best_frequencies() param = gumbel_r.fit(pbest_bootstrap.ravel()) rv = gumbel_r(loc=param[0], scale=param[1]) x = np.linspace(rv.ppf(0.001), rv.ppf(0.999), 100) p_vals = [0.01, 0.05, 0.08] sig1 = rv.ppf(1.-p_vals[0]) sig5 = rv.ppf(1.-p_vals[1]) sig8 = rv.ppf(1.-p_vals[2]) bestpower = pbest[0] bestpower2 = pbest[1] bestpower3 = pbest[2] bestpower4 = pbest[3] print bestperiod, bestperiod2, bestperiod3, bestperiod4
def _ppp_harris1996(self): """ Review of the traditional Gumbel extreme value method for analysing yearly maximum windspeeds or similar data, with a view to improving the process. An improved set of plotting positions based on the mean values of the order statistics are derived, together with a means of obtaining the standard deviation of each position. This enables a fitting procedure using weighted least squares to be adopted, which gives results similar to the traditional Lieblein BLUE process, but with the advantages that it does not require tabulated coefficients, is available for any number of data up to at least 50, and provides a quantitative measure of goodness of fit. **References** Harris RI, (1996), 'Gumbel re-visited -- a new look at extreme value statistics applied to wind speeds', Journal of Wind Engineering and Industrial Aerodynamics, 59, 1-22. """ data = _np.sort(self.data)[::-1] data = data ** self.preconditioning N = self.N ymean = _np.empty(N) ymean2 = _np.empty(N) variance = _np.empty(N) weight = _np.empty(N) def integ_ymean(x, N, NU): return -_np.log(-_np.log(x)) * (x** (N-NU)) * ((1-x)**(NU-1)) def integ_ymean2(x, N, NU): return ((-_np.log(-_np.log(x)))**2) * (x** (N-NU)) * ((1-x)**(NU-1)) for NU in range(1, N+1): # calculation of ymean a = _fact(N)/(_fact(NU - 1) * _fact(N - NU)) b, err = _integrate.quad(integ_ymean, 0, 1, args = (N, NU)) ymean[NU-1] = a * b # calculation of ymean2 b, err = _integrate.quad(integ_ymean2, 0, 1, args = (N, NU)) ymean2[NU-1] = a * b # calculation of variance variance[NU-1] = _np.sqrt((ymean2[NU-1] - ymean[NU-1]**2)) # calculation of weights denominator = _np.sum(1/variance**2) for NU in range(1, N+1): weight[NU-1] = (1 / variance[NU-1]**2) / denominator # calculation of alpha # Numerator sum1 = _np.sum(weight * ymean * (data)) sum2 = _np.sum(weight * ymean) sum3 = _np.sum(weight * (data)) # Denominator sum4 = _np.sum(weight * (data**2)) sum5 = sum3 ** 2 # alpha alpha = (sum1 - sum2 * sum3) / (sum4 - sum5) # calculation of characteristic product pi_upper = alpha * sum3 - sum2 # calculation of the extreme values for the return periods between 2 and 100 years return_periods = _np.arange(2, 100 + 1) v_ext_tmp = [(-_np.log(-_np.log(1 - 1 / return_period)) + pi_upper) / alpha for return_period in return_periods] v_ext = [v ** (1 / self.preconditioning) for v in v_ext_tmp] # Calculation of the residual std dev deviation = _np.sum(weight * ((ymean - alpha * data + pi_upper)**2)) residual_stddev = _np.sqrt(deviation * N / (N - 2)) self.results = {} self.results['Y'] = ymean self.results['weights'] = weight self.results['data'] = data self.results['Values for return period from 2 to 100 years'] = v_ext self.results['slope'] = 1. / alpha self.results['offset'] = pi_upper / alpha self.results['characteristic product'] = pi_upper self.results['alpha'] = alpha self.results['residual standard deviation'] = residual_stddev self.c = 0 self.loc = self.results['offset'] self.scale = self.results['slope'] self.distr = _st.gumbel_r(loc = self.loc, scale = self.scale)
def _ppp_lieblein(self): """ Lieblein-BLUE (Best Linear Unbiased Estimator) to obtain extreme values using a Type I (Gumbel) extreme value distribution. It approaches the calculation of extremes using a very classical methodology provided by Julius Lieblein. It exists just to check how several consultants made the calculation of wind speed extremes in the wind energy industry. It calculates extremes using an adjustment of Gumbel distribution using least squares fit and considering several probability-plotting positions used in the wild. **References** Lieblein J, (1974), 'Efficient methods of Extreme-Value Methodology', NBSIR 74-602, National Bureau of Standards, U.S. Department of Commerce. """ # coefficients for sample below or equal to 16 elements ai = { 'n = 02': [0.916373, 0.083627], 'n = 03': [0.656320, 0.255714, 0.087966], 'n = 04': [0.510998, 0.263943, 0.153680, 0.071380], 'n = 05': [0.418934, 0.246282, 0.167609, 0.108824, 0.058350], 'n = 06': [0.355450, 0.225488, 0.165620, 0.121054, 0.083522, 0.048867], 'n = 07': [0.309008, 0.206260, 0.158590, 0.123223, 0.093747, 0.067331, 0.041841], 'n = 08': [0.273535, 0.189428, 0.150200, 0.121174, 0.097142, 0.075904, 0.056132, 0.036485], 'n = 09': [0.245539, 0.174882, 0.141789, 0.117357, 0.097218, 0.079569, 0.063400, 0.047957, 0.032291], 'n = 10': [0.222867, 0.162308, 0.133845, 0.112868, 0.095636, 0.080618, 0.066988, 0.054193, 0.041748, 0.028929], 'n = 11': [0.204123, 0.151384, 0.126522, 0.108226, 0.093234, 0.080222, 0.068485, 0.057578, 0.047159, 0.036886, 0.026180], 'n = 12': [0.188361, 0.141833, 0.119838, 0.103673, 0.090455, 0.079018, 0.068747, 0.059266, 0.050303, 0.041628, 0.032984, 0.023894], 'n = 13': [0.174916, 0.133422, 0.113759, 0.099323, 0.087540, 0.077368, 0.068264, 0.059900, 0.052047, 0.044528, 0.037177, 0.029790, 0.021965], 'n = 14': [0.163309, 0.125966, 0.108230, 0.095223, 0.084619, 0.075484, 0.067331, 0.059866, 0.052891, 0.046260, 0.039847, 0.033526, 0.027131, 0.020317], 'n = 15': [0.153184, 0.119314, 0.103196, 0.091384, 0.081767, 0.073495, 0.066128, 0.059401, 0.053140, 0.047217, 0.041529, 0.035984, 0.030484, 0.024887, 0.018894], 'n = 16': [0.144271, 0.113346, 0.098600, 0.087801, 0.079021, 0.071476, 0.064771, 0.058660, 0.052989, 0.047646, 0.042539, 0.037597, 0.032748, 0.027911, 0.022969, 0.017653] } bi = { 'n = 02': [-0.721348, 0.721348], 'n = 03': [-0.630541, 0.255816, 0.374725], 'n = 04': [-0.558619, 0.085903, 0.223919, 0.248797], 'n = 05': [-0.503127, 0.006534, 0.130455, 0.181656, 0.184483], 'n = 06': [-0.459273, -0.035992, 0.073199, 0.126724, 0.149534, 0.145807], 'n = 07': [-0.423700, -0.060698, 0.036192, 0.087339, 0.114868, 0.125859, 0.120141], 'n = 08': [-0.394187, -0.075767, 0.011124, 0.058928, 0.087162, 0.102728, 0.108074, 0.101936], 'n = 09': [-0.369242, -0.085203, -0.006486, 0.037977, 0.065574, 0.082654, 0.091965, 0.094369, 0.088391], 'n = 10': [-0.347830, -0.091158, -0.019210, 0.022179, 0.048671, 0.066064, 0.077021, 0.082771, 0.083552, 0.077940], 'n = 11': [-0.329210, -0.094869, -0.028604, 0.010032, 0.035284, 0.052464, 0.064071, 0.071381, 0.074977, 0.074830, 0.069644], 'n = 12': [-0.312840, -0.097086, -0.035655, 0.000534, 0.024548, 0.041278, 0.053053, 0.061112, 0.066122, 0.068357, 0.067671, 0.062906], 'n = 13': [-0.298313, -0.098284, -0.041013, -0.006997, 0.015836, 0.032014, 0.043710, 0.052101, 0.057862, 0.061355, 0.062699, 0.061699, 0.057330], 'n = 14': [-0.285316, -0.098775, -0.045120, -0.013039, 0.008690, 0.024282, 0.035768, 0.044262, 0.050418, 0.054624, 0.057083, 0.057829, 0.056652, 0.052642], 'n = 15': [-0.273606, -0.098768, -0.048285, -0.017934, 0.002773, 0.017779, 0.028988, 0.037452, 0.043798, 0.048415, 0.051534, 0.053267, 0.053603, 0.052334, 0.048648], 'n = 16': [-0.262990, -0.098406, -0.050731, -0.021933, -0.002167, 0.012270, 0.023168, 0.031528, 0.037939, 0.042787, 0.046308, 0.048646, 0.049860, 0.049912, 0.048602, 0.045207] } data = _np.sort(self.data) data = data ** self.preconditioning N = self.N # hyp and coeffs are used to calculate values for samples higher than 16 elements # Hypergeometric distribution function def hyp(n,m,i,t): bin1 = _fact(i)/(_fact(t) * _fact(i - t)) bin2 = _fact(n-i)/(_fact(m-t) * _fact((n-i) - (m-t))) bin3 = _fact(n)/(_fact(m) * _fact(n - m)) return bin1 * bin2 / bin3 # Coefficients def coeffs(n, m): aip = [] bip = [] for i in range(n): a = 0 b = 0 for t in range(m): try: a += ai['n = {:02}'.format(m)][t] * ((t + 1) / (i + 1)) * hyp(n, m, i + 1, t + 1) b += bi['n = {:02}'.format(m)][t] * ((t + 1) / (i + 1)) * hyp(n, m, i + 1, t + 1) except: pass aip.append(a) bip.append(b) return aip, bip def distr_params(): if N <= 16: mu = _np.sum(_np.array(ai['n = {:02}'.format(N)]) * data) #parameter u in the paper sigma = _np.sum(_np.array(bi['n = {:02}'.format(N)]) * data) #parameter b in the paper else: aip, bip = coeffs(N, 16) mu = _np.sum(_np.array(aip) * data) sigma = _np.sum(_np.array(bip) * data) return mu, sigma mu, sigma = distr_params() return_period = _np.arange(2, 100 + 1) P = ((_np.arange(N) + 1)) / (N + 1) Y = -_np.log(-_np.log(P)) vref = (- sigma * _np.log(-_np.log(1 - 1 / return_period)) + mu)**(1 / self.preconditioning) self.results = {} self.results['Y'] = Y self.results['data'] = data self.results['Values for return period from 2 to 100 years'] = vref self.results['slope'] = sigma self.results['offset'] = mu self.c = 0 self.loc = self.results['offset'] self.scale = self.results['slope'] self.distr = _st.gumbel_r(loc = self.loc, scale = self.scale)
def _calculate_values(self, how = None): data = _np.sort(self.data) data = data ** self.preconditioning N = self.N if how == 'Adamowski': # see De, M., 2000. A new unbiased plotting position formula for gumbel distribution. # Stochastic Envir. Res. Risk Asses., 14: 1-7. P = ((_np.arange(N) + 1) - 0.25) / (N + 0.5) if how == 'Beard': # see De, M., 2000. A new unbiased plotting position formula for gumbel distribution. # Stochastic Envir. Res. Risk Asses., 14: 1-7. P = ((_np.arange(N) + 1) - 0.31) / (N + 0.38) if how == 'Blom': # see Adeboye, O.B. and M.O. Alatise, 2007. Performance of probability distributions and plotting # positions in estimating the flood of River Osun at Apoje Sub-basin, Nigeria. Agric. Eng. Int.: CIGR J., Vol. 9. P = ((_np.arange(N) + 1) - 0.375) / (N + 0.25) if how == 'Chegodayev': # see De, M., 2000. A new unbiased plotting position formula for gumbel distribution. # Stochastic Envir. Res. Risk Asses., 14: 1-7. P = ((_np.arange(N) + 1) - 0.3) / (N + 0.4) if how == 'Cunnane': # see Cunnane, C., 1978. Unbiased plotting positions: A review. J. Hydrol., 37: 205-222. P = ((_np.arange(N) + 1) - 0.4) / (N + 0.2) if how == 'Gringorten': # see Adeboye, O.B. and M.O. Alatise, 2007. Performance of probability distributions and plotting # positions in estimating the flood of River Osun at Apoje Sub-basin, Nigeria. Agric. Eng. Int.: CIGR J., Vol. 9. P = ((_np.arange(N) + 1) - 0.44) / (N + 0.12) if how == 'Hazen': # see Adeboye, O.B. and M.O. Alatise, 2007. Performance of probability distributions and plotting # positions in estimating the flood of River Osun at Apoje Sub-basin, Nigeria. Agric. Eng. Int.: CIGR J., Vol. 9. P = ((_np.arange(N) + 1) - 0.5) / (N) if how == 'Hirsch': # see Jay, R.L., O. Kalman and M. Jenkins, 1998. Integrated planning and management for Urban water # supplies considering multi uncertainties. Technical Report, # Department of Civil and Environmental Engineering, Universities of California. P = ((_np.arange(N) + 1) + 0.5) / (N + 1) if how == 'IEC56': # see Forthegill, J.C., 1990. Estimating the cumulative probability of failure data points to be # plotted on weibull and other probability paper. Electr. Insulation Transact., 25: 489-492. P = ((_np.arange(N) + 1) - 0.5) / (N + 0.25) if how == 'Landwehr': # see Makkonen, L., 2008. Problem in the extreme value analysis. Structural Safety, 30: 405-419. P = ((_np.arange(N) + 1) - 0.35) / (N) if how == 'Laplace': # see Jay, R.L., O. Kalman and M. Jenkins, 1998. Integrated planning and management for Urban # water supplies considering multi uncertainties. Technical Report, # Department of Civil and Environmental Engineering, Universities of California. P = ((_np.arange(N) + 1) + 1) / (N + 2) if how == 'McClung and Mears': # see Makkonen, L., 2008. Problem in the extreme value analysis. Structural Safety, 30: 405-419. P = ((_np.arange(N) + 1) - 0.4) / (N) if how == 'Tukey': # see Makkonen, L., 2008. Problem in the extreme value analysis. Structural Safety, 30: 405-419. P = ((_np.arange(N) + 1) - 1/3) / (N + 1/3) if how == 'Weibull': # see Hynman, R.J. and Y. Fan, 1996. Sample quantiles in statistical packages. Am. Stat., 50: 361-365. P = ((_np.arange(N) + 1)) / (N + 1) Y = -_np.log(-_np.log(P)) slope, offset = _np.polyfit(Y, data, 1) R2 = _np.corrcoef(Y, data)[0, 1]**2 #fit = slope * Y + offset return_period = _np.arange(2,101) vref = (- slope * _np.log(-_np.log(1 - 1 / return_period)) + offset)**(1 / self.preconditioning) self.results = {} self.results['data'] = data self.results['Y'] = Y self.results['Values for return period from 2 to 100 years'] = vref self.results['R2'] = R2 self.results['slope'] = slope self.results['offset'] = offset self.c = 0 self.loc = self.results['offset'] self.scale = self.results['slope'] self.distr = _st.gumbel_r(loc=self.loc, scale=self.scale)
def get_y(params, x, tail): if tail == 'upper': return -np.log(-ss.gumbel_r(*params).logcdf(x)) else: return -np.log(-ss.gumbel_l(*params).logsf(x))