def cdf(self, strike, spot, texp, cp=1): fwd, df, _ = self._fwd_factor(spot, texp) sig2_inv = np.exp(self.sigma**2 * texp) - 1 ig = spst.invgauss(mu=sig2_inv, scale=1 / sig2_inv) x = strike / fwd cdf = np.where(cp > 0, ig.sf(x), ig.cdf(x)) return cdf
def estimate_tweeide_logcdf_series(x, mu, phi, p): """Estimate the logcdf of a given set of x, mu, phi, and p Parameters ---------- x : array The observed values. Must be non-negative. mu : array The fitted values. Must be positive. phi : array The scale paramter. Must be positive. p : array The Tweedie variance power. Must equal 0 or must be greater than or equal to 1. Returns ------- estiate_tweedie_loglike_series : float """ x = np.array(x, ndmin=1) mu = np.array(mu, ndmin=1) phi = np.array(phi, ndmin=1) p = np.array(p, ndmin=1) logcdf = np.zeros_like(x) # Gaussian (Normal) mask = p == 0 if np.sum(mask) > 0: logcdf[mask] = norm(loc=mu[mask], scale=np.sqrt(phi[mask])).logcdf(x[mask]) # Poisson mask = p == 1. if np.sum(mask) > 0: logcdf[mask] = np.log(poisson(mu=mu[mask] / phi[mask]).cdf(x[mask])) # 1 < p < 2 mask = (1 < p) & (p < 2) if np.sum(mask) > 0: cond1 = mask cond2 = x > 0 mask = cond1 & cond2 logcdf[mask] = logcdf_1to2(x[mask], mu[mask], phi[mask], p[mask]) mask = cond1 & ~cond2 logcdf[mask] = -(mu[mask]**(2 - p[mask]) / (phi[mask] * (2 - p[mask]))) # Gamma mask = p == 2 if np.sum(mask) > 0: logcdf[mask] = gamma(a=1 / phi[mask], scale=phi[mask] * mu[mask]).logcdf(x[mask]) # Inverse Gaussian (Normal) mask = p == 3 if np.sum(mask) > 0: logcdf[mask] = invgauss(mu=mu[mask] * phi[mask], scale=1 / phi[mask]).logcdf(x[mask]) return logcdf
def generate_values(line, length_mean, length_std): """ Generate values for a given kmer, according to description provided in line. The number of values is randomised; it is drawn from normal distribution with mean length_mean and standard deviation length_std (length_std = 0 means no randomisation). line - line of a template (row from pandas.DataFrame) length_mean - mean of a Gauss distribution from which length of a sequence will be drawn length_std - standard deviation of see above. Description of columns in original template: kmer The kmer being modelled. level_mean The mean of a Gaussian distribution representing the current observed for this kmer. level_stdv The standard deviation of the above Gaussian distribution of observed currents for this kmer. sd_mean The mean of an inverse Gaussian distribution representing the noise observed for this kmer. sd_stdv The standard deviation of the above inverse Gaussian distribution of noise observed for this kmer. ig_lambda The lambda parameter for the above inverse Gaussian distribution of noise observed for this kmer. (so sd_stdv == sqrt(sd_mean ^ 3 / ig_lambda). See Wikipedia for more info. weight Used internally for model training purposes. """ signal_var = norm(line["level_mean"], line["level_stdv"]) mu, lmbda = line["sd_mean"], line["ig_lambda"] noise_var = invgauss(mu / lmbda, scale=lmbda) n = norm.rvs(loc=length_mean, scale=length_std) #n = max(1, int(n)) n = max(0, int(n)) values = signal_var.rvs(n) + noise_var.rvs(n) return values
def log_event_sd_inv_gaussian_probability_match(self, event_sd, kmer): """Get the probability of the event_sd coming from the model's kmer inv-gaussian distribution :param event_sd: sd of event :param kmer: kmer for model distribution selection """ inv_gauss_mean, inv_gauss_lambda = self.get_event_sd_inv_gaussian_parameters( kmer) return invgauss(inv_gauss_mean / inv_gauss_lambda, scale=inv_gauss_lambda).logpdf(event_sd)
def price(self, strike, spot, texp, cp=1): fwd, df, _ = self._fwd_factor(spot, texp) sig2_inv = np.exp(self.sigma**2 * texp) - 1 ig = spst.invgauss(mu=sig2_inv, scale=1 / sig2_inv) kk = strike / fwd price = np.where( cp > 0, ig.cdf(1 / kk) - kk * ig.sf(kk), kk * ig.cdf(kk) - ig.sf(1 / kk), ) return df * fwd * price
def test_invgauss_pdf(): for i in range(10): a = np.random.rand()*10. v = np.random.rand()*10. sigma = np.random.rand()*10. mu = a / v lam = a**2 / sigma**2 x = np.linspace(0.1, 100, 1000) y1 = np.exp(likelihoods.invgauss_logpdf(x, 0, a, v, sigma=sigma, p_outlier=0.)) # needs transform: https://github.com/scipy/scipy/issues/2367#issuecomment-17028905 y2 = stats.invgauss(mu/lam, loc=0, scale=lam).pdf(x) np.testing.assert_array_almost_equal(y1, y2)
def _distDivergence(self, _model1, _model2): """ Compute the divergence between the two probability models (Variation of Jensen Shannon) _model1: The basic model. _model2: The test models. RETURN: The divergence score [0,1] """ POINT_DENSITY = 1000 # how many points to fit # basis fit model = stats.invgauss(mu=_model1[0], loc=_model1[1], scale=_model1[2]) x = np.linspace(model.ppf(0.010), model.ppf(0.999), POINT_DENSITY) p1 = model.pdf(x) avgP = p1 # test fit p2 = np.zeros((_model2.shape[0], POINT_DENSITY)) for i in range(0, _model2.shape[0]): model = stats.invgauss(mu=_model2[i, 0], loc=_model2[i, 1], scale=_model2[i, 2]) x = np.linspace(model.ppf(0.001), model.ppf(0.999), POINT_DENSITY) p2[i, :] = model.pdf(x) avgP = avgP + p2[i, :] # compute average avgP = avgP / (1 + _model2.shape[0]) gJS = np.sum(p1 * np.log(np.divide(p1, avgP))) for i in range(0, _model2.shape[0]): gJS = gJS + np.sum(p2 * np.log(np.divide(p2[i, :], avgP))) gJS = gJS / (1 + _model2.shape[0]) return (math.exp(-gJS / 1000))
def test_invgauss_pdf(): for i in range(10): a = np.random.rand() * 10. v = np.random.rand() * 10. sigma = np.random.rand() * 10. mu = a / v lam = a**2 / sigma**2 x = np.linspace(0.1, 100, 1000) y1 = np.exp( likelihoods.invgauss_logpdf(x, 0, a, v, sigma=sigma, p_outlier=0.)) # needs transform: https://github.com/scipy/scipy/issues/2367#issuecomment-17028905 y2 = stats.invgauss(mu / lam, loc=0, scale=lam).pdf(x) np.testing.assert_array_almost_equal(y1, y2)
def _ppf(self, q, p, mu, phi): p = np.broadcast_to(p, q.shape) mu = np.broadcast_to(mu, q.shape) phi = np.broadcast_to(phi, q.shape) single1to2v = np.vectorize(self._ppf_single1to2, otypes='d') ppf = np.zeros(q.shape, dtype=float) # Gaussian mask = p == 0 if np.sum(mask) > 0: ppf[mask] = norm(loc=mu[mask], scale=np.sqrt(phi[mask])).ppf(q[mask]) # Poisson mask = p == 1 if np.sum(mask) > 0: ppf[mask] = poisson(mu=mu[mask] / phi[mask]).ppf(q[mask]) # 1 < p < 2 mask = (1 < p) & (p < 2) if np.sum(mask) > 0: zero_mass = np.zeros_like(ppf) zeros = np.zeros_like(ppf) zero_mass[mask] = self._cdf(zeros[mask], p[mask], mu[mask], phi[mask]) right = 10 * mu * phi**p cond1 = mask cond2 = q > zero_mass if np.sum(cond1 & ~cond2) > 0: ppf[cond1 & ~cond2] = zeros[cond1 & ~cond2] if np.sum(cond1 & cond2) > 0: single1to2v = np.vectorize(self._ppf_single1to2, otypes='d') mask = cond1 & cond2 ppf[mask] = single1to2v(q[mask], p[mask], mu[mask], phi[mask], zero_mass[mask], right[mask]) # Gamma mask = p == 2 if np.sum(mask) > 0: ppf[mask] = gamma(a=1 / phi[mask], scale=phi[mask] * mu[mask]).ppf(q[mask]) # Inverse Gamma mask = p == 3 if np.sum(mask) > 0: ppf[mask] = invgauss(mu=mu[mask] * phi[mask], scale=1 / phi[mask]).ppf(q[mask]) return ppf
def test_invgauss_sampling(): samples = 5000 likelihoods.init_rands((samples, 3), seed=101) for i in range(10): a = np.random.rand()*10. v = np.random.rand()*10. sigma = np.random.rand()*10. mu = a / v lam = a**2 / sigma**2 samples = likelihoods.invgauss(0, a, v, sigma=sigma) # needs transform: https://github.com/scipy/scipy/issues/2367#issuecomment-17028905 D, p_value = stats.kstest(samples, stats.invgauss(mu/lam, loc=0, scale=lam).cdf) assert p_value > .05, "Not from the same distribution. Params: a=%f, v=%f, mu=%f, lam=%f" % (a, v, mu, lam)
def test_invgauss_sampling(): samples = 5000 likelihoods.init_rands((samples, 3), seed=101) for i in range(10): a = np.random.rand() * 10. v = np.random.rand() * 10. sigma = np.random.rand() * 10. mu = a / v lam = a**2 / sigma**2 samples = likelihoods.invgauss(0, a, v, sigma=sigma) # needs transform: https://github.com/scipy/scipy/issues/2367#issuecomment-17028905 D, p_value = stats.kstest( samples, stats.invgauss(mu / lam, loc=0, scale=lam).cdf) assert p_value > .05, "Not from the same distribution. Params: a=%f, v=%f, mu=%f, lam=%f" % ( a, v, mu, lam)
def all_dists(): # dists param were taken from scipy.stats official # documentaion examples # Total - 89 return { "alpha": stats.alpha(a=3.57, loc=0.0, scale=1.0), "anglit": stats.anglit(loc=0.0, scale=1.0), "arcsine": stats.arcsine(loc=0.0, scale=1.0), "beta": stats.beta(a=2.31, b=0.627, loc=0.0, scale=1.0), "betaprime": stats.betaprime(a=5, b=6, loc=0.0, scale=1.0), "bradford": stats.bradford(c=0.299, loc=0.0, scale=1.0), "burr": stats.burr(c=10.5, d=4.3, loc=0.0, scale=1.0), "cauchy": stats.cauchy(loc=0.0, scale=1.0), "chi": stats.chi(df=78, loc=0.0, scale=1.0), "chi2": stats.chi2(df=55, loc=0.0, scale=1.0), "cosine": stats.cosine(loc=0.0, scale=1.0), "dgamma": stats.dgamma(a=1.1, loc=0.0, scale=1.0), "dweibull": stats.dweibull(c=2.07, loc=0.0, scale=1.0), "erlang": stats.erlang(a=2, loc=0.0, scale=1.0), "expon": stats.expon(loc=0.0, scale=1.0), "exponnorm": stats.exponnorm(K=1.5, loc=0.0, scale=1.0), "exponweib": stats.exponweib(a=2.89, c=1.95, loc=0.0, scale=1.0), "exponpow": stats.exponpow(b=2.7, loc=0.0, scale=1.0), "f": stats.f(dfn=29, dfd=18, loc=0.0, scale=1.0), "fatiguelife": stats.fatiguelife(c=29, loc=0.0, scale=1.0), "fisk": stats.fisk(c=3.09, loc=0.0, scale=1.0), "foldcauchy": stats.foldcauchy(c=4.72, loc=0.0, scale=1.0), "foldnorm": stats.foldnorm(c=1.95, loc=0.0, scale=1.0), # "frechet_r": stats.frechet_r(c=1.89, loc=0.0, scale=1.0), # "frechet_l": stats.frechet_l(c=3.63, loc=0.0, scale=1.0), "genlogistic": stats.genlogistic(c=0.412, loc=0.0, scale=1.0), "genpareto": stats.genpareto(c=0.1, loc=0.0, scale=1.0), "gennorm": stats.gennorm(beta=1.3, loc=0.0, scale=1.0), "genexpon": stats.genexpon(a=9.13, b=16.2, c=3.28, loc=0.0, scale=1.0), "genextreme": stats.genextreme(c=-0.1, loc=0.0, scale=1.0), "gausshyper": stats.gausshyper(a=13.8, b=3.12, c=2.51, z=5.18, loc=0.0, scale=1.0), "gamma": stats.gamma(a=1.99, loc=0.0, scale=1.0), "gengamma": stats.gengamma(a=4.42, c=-3.12, loc=0.0, scale=1.0), "genhalflogistic": stats.genhalflogistic(c=0.773, loc=0.0, scale=1.0), "gilbrat": stats.gilbrat(loc=0.0, scale=1.0), "gompertz": stats.gompertz(c=0.947, loc=0.0, scale=1.0), "gumbel_r": stats.gumbel_r(loc=0.0, scale=1.0), "gumbel_l": stats.gumbel_l(loc=0.0, scale=1.0), "halfcauchy": stats.halfcauchy(loc=0.0, scale=1.0), "halflogistic": stats.halflogistic(loc=0.0, scale=1.0), "halfnorm": stats.halfnorm(loc=0.0, scale=1.0), "halfgennorm": stats.halfgennorm(beta=0.675, loc=0.0, scale=1.0), "hypsecant": stats.hypsecant(loc=0.0, scale=1.0), "invgamma": stats.invgamma(a=4.07, loc=0.0, scale=1.0), "invgauss": stats.invgauss(mu=0.145, loc=0.0, scale=1.0), "invweibull": stats.invweibull(c=10.6, loc=0.0, scale=1.0), "johnsonsb": stats.johnsonsb(a=4.32, b=3.18, loc=0.0, scale=1.0), "johnsonsu": stats.johnsonsu(a=2.55, b=2.25, loc=0.0, scale=1.0), "ksone": stats.ksone(n=1e03, loc=0.0, scale=1.0), "kstwobign": stats.kstwobign(loc=0.0, scale=1.0), "laplace": stats.laplace(loc=0.0, scale=1.0), "levy": stats.levy(loc=0.0, scale=1.0), "levy_l": stats.levy_l(loc=0.0, scale=1.0), "levy_stable": stats.levy_stable(alpha=0.357, beta=-0.675, loc=0.0, scale=1.0), "logistic": stats.logistic(loc=0.0, scale=1.0), "loggamma": stats.loggamma(c=0.414, loc=0.0, scale=1.0), "loglaplace": stats.loglaplace(c=3.25, loc=0.0, scale=1.0), "lognorm": stats.lognorm(s=0.954, loc=0.0, scale=1.0), "lomax": stats.lomax(c=1.88, loc=0.0, scale=1.0), "maxwell": stats.maxwell(loc=0.0, scale=1.0), "mielke": stats.mielke(k=10.4, s=3.6, loc=0.0, scale=1.0), "nakagami": stats.nakagami(nu=4.97, loc=0.0, scale=1.0), "ncx2": stats.ncx2(df=21, nc=1.06, loc=0.0, scale=1.0), "ncf": stats.ncf(dfn=27, dfd=27, nc=0.416, loc=0.0, scale=1.0), "nct": stats.nct(df=14, nc=0.24, loc=0.0, scale=1.0), "norm": stats.norm(loc=0.0, scale=1.0), "pareto": stats.pareto(b=2.62, loc=0.0, scale=1.0), "pearson3": stats.pearson3(skew=0.1, loc=0.0, scale=1.0), "powerlaw": stats.powerlaw(a=1.66, loc=0.0, scale=1.0), "powerlognorm": stats.powerlognorm(c=2.14, s=0.446, loc=0.0, scale=1.0), "powernorm": stats.powernorm(c=4.45, loc=0.0, scale=1.0), "rdist": stats.rdist(c=0.9, loc=0.0, scale=1.0), "reciprocal": stats.reciprocal(a=0.00623, b=1.01, loc=0.0, scale=1.0), "rayleigh": stats.rayleigh(loc=0.0, scale=1.0), "rice": stats.rice(b=0.775, loc=0.0, scale=1.0), "recipinvgauss": stats.recipinvgauss(mu=0.63, loc=0.0, scale=1.0), "semicircular": stats.semicircular(loc=0.0, scale=1.0), "t": stats.t(df=2.74, loc=0.0, scale=1.0), "triang": stats.triang(c=0.158, loc=0.0, scale=1.0), "truncexpon": stats.truncexpon(b=4.69, loc=0.0, scale=1.0), "truncnorm": stats.truncnorm(a=0.1, b=2, loc=0.0, scale=1.0), "tukeylambda": stats.tukeylambda(lam=3.13, loc=0.0, scale=1.0), "uniform": stats.uniform(loc=0.0, scale=1.0), "vonmises": stats.vonmises(kappa=3.99, loc=0.0, scale=1.0), "vonmises_line": stats.vonmises_line(kappa=3.99, loc=0.0, scale=1.0), "wald": stats.wald(loc=0.0, scale=1.0), "weibull_min": stats.weibull_min(c=1.79, loc=0.0, scale=1.0), "weibull_max": stats.weibull_max(c=2.87, loc=0.0, scale=1.0), "wrapcauchy": stats.wrapcauchy(c=0.0311, loc=0.0, scale=1.0), }
mu = 0.145 mean, var, skew, kurt = invgauss.stats(mu, moments='mvsk') # Display the probability density function (``pdf``): x = np.linspace(invgauss.ppf(0.01, mu), invgauss.ppf(0.99, mu), 100) ax.plot(x, invgauss.pdf(x, mu), 'r-', lw=5, alpha=0.6, label='invgauss pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = invgauss(mu) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = invgauss.ppf([0.001, 0.5, 0.999], mu) np.allclose([0.001, 0.5, 0.999], invgauss.cdf(vals, mu)) # True # Generate random numbers: r = invgauss.rvs(mu, size=1000) # And compare the histogram: ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
print(abs(logabsderiv - np.log(-back(1))).max()) if False: import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) from scipy import stats # y = np.linspace(-5,EER2logdprime(0.000001),200) # pdf_x = stats.beta(1,1,scale=0.5).pdf #uniform between 0 and 0.5 # y2x = logdprime2EER # pdf_y = reparam_pdf(pdf_x,y2x,y) # ax.plot(y,pdf_y,label="log(d')") # y = np.linspace(np.exp(-5),np.exp(EER2logdprime(0.000001)),200) # pdf_x = stats.beta(1,4,scale=0.5).pdf #uniform between 0 and 0.5 # y2x = dprime2EER # pdf_y = reparam_pdf(pdf_x,y2x,y) # ax.plot(y,pdf_y,label="d'") y = np.linspace(0, 0.49, 200) pdf_x = stats.invgauss(mu=1, scale=1).pdf y2x = EER2dprime pdf_y = reparam_pdf(pdf_x, y2x, y) ax.plot(y, pdf_y, label="EER") ax.legend(loc='best', frameon=False) plt.xlabel("EER") plt.grid() plt.show()
from scipy.stats import invgauss import matplotlib.pyplot as plt import numpy as np #invgauss.pdf(x, mu) = 1 / sqrt(2*pi*x**3) * exp(-(x-mu)**2/(2*x*mu**2)) fig, ax = plt.subplots(1, 1) mu = 1 x = np.linspace(invgauss.pdf(0.01, mu),invgauss.pdf(0.99, mu), 100) ax.plot(x, invgauss.pdf(x, mu),'r-', lw=5, alpha=0.6, label='invgauss pdf') rv = invgauss(mu) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') r = invgauss.rvs(mu, size=1000) ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) plt.show()
def _scipy_invgauss(loc, concentration): # Wrapper of scipy's invgauss function, which is used to generate expected # output. # scipy uses a different parameterization. # See https://github.com/scipy/scipy/issues/4654. return stats.invgauss(mu=loc/concentration, scale=concentration)
def estimate_tweedie_loglike_series(x, mu, phi, p): """Estimate the loglikihood of a given set of x, mu, phi, and p Parameters ---------- x : array The observed values. Must be non-negative. mu : array The fitted values. Must be positive. phi : array The scale paramter. Must be positive. p : array The Tweedie variance power. Must equal 0 or must be greater than or equal to 1. Returns ------- estiate_tweedie_loglike_series : float """ x = np.array(x, ndmin=1) mu = np.array(mu, ndmin=1) phi = np.array(phi, ndmin=1) p = np.array(p, ndmin=1) ll = np.ones_like(x) * -np.inf # Gaussian (Normal) gaussian_mask = p == 0. if np.sum(gaussian_mask) > 0: ll[gaussian_mask] = norm(loc=mu[gaussian_mask], scale=np.sqrt(phi[gaussian_mask])).logpdf( x[gaussian_mask]) # Poisson poisson_mask = p == 1. if np.sum(poisson_mask) > 0: poisson_pdf = poisson(mu=mu[poisson_mask] / phi[poisson_mask]).pmf( x[poisson_mask] / phi[poisson_mask]) / phi[poisson_mask] ll[poisson_mask] = np.log(poisson_pdf) # 1 < p < 2 ll_1to_2_mask = (1 < p) & (p < 2) if np.sum(ll_1to_2_mask) > 0: # Calculating logliklihood at x == 0 is pretty straightforward zeros = x == 0 mask = zeros & ll_1to_2_mask ll[mask] = -(mu[mask]**(2 - p[mask]) / (phi[mask] * (2 - p[mask]))) mask = ~zeros & ll_1to_2_mask ll[mask] = ll_1to2(x[mask], mu[mask], phi[mask], p[mask]) # Gamma gamma_mask = p == 2 if np.sum(gamma_mask) > 0: ll[gamma_mask] = gamma(a=1 / phi, scale=phi * mu).logpdf(x[gamma_mask]) # (2 < p < 3) or (p > 3) ll_2plus_mask = ((2 < p) & (p < 3)) | (p > 3) if np.sum(ll_2plus_mask) > 0: zeros = x == 0 mask = zeros & ll_2plus_mask ll[mask] = -np.inf mask = ~zeros & ll_2plus_mask ll[mask] = ll_2orMore(x[mask], mu[mask], phi[mask], p[mask]) # Inverse Gaussian (Normal) invgauss_mask = p == 3 if np.sum(invgauss_mask) > 0: cond1 = invgauss_mask cond2 = x > 0 mask = cond1 & cond2 ll[mask] = invgauss(mu=mu[mask] * phi[mask], scale=1. / phi[mask]).logpdf(x[mask]) return ll
def _scipy_invgauss(loc, concentration): # Wrapper of scipy's invgauss function, which is used to generate expected # output. # scipy uses a different parameterization. # See https://github.com/scipy/scipy/issues/4654. return stats.invgauss(mu=loc / concentration, scale=concentration)
"""Plot some basic BPT distributions """ import os, sys from matplotlib import pyplot as plt import numpy as np from scipy.stats import expon, gamma, weibull_min, invgauss mu = 100 alphas = [0.5, 1., 2., 5., 10.] #alpha = 1 x_vals = np.arange(0, 4 * mu) # Plot for a range of alpha values for alpha in alphas: bpt = invgauss(alpha, scale=mu) pdf_vals = bpt.pdf(x_vals) plt.plot(x_vals, pdf_vals, label=alpha) # Now add exponential exp_dist = expon(scale=mu) pdf_vals = exp_dist.pdf(x_vals) plt.plot(x_vals, pdf_vals, label='Exponential', color='k') plt.legend() plt.savefig('BPT_distribution.png')