def cdf(x, mu, loc=0, scale=1): """ CDF for the inverse Gaussian distribution. """ with mpmath.extradps(5): mu, loc, scale = _validate_params(mu, loc, scale) x = mpmath.mpf(x) if x <= loc: return mpmath.mp.zero z = (x - loc) / scale t1 = mpmath.ncdf((z / mu - 1) / mpmath.sqrt(z)) t2 = mpmath.exp(2 / mu) * mpmath.ncdf(-(z / mu + 1) / mpmath.sqrt(z)) return t1 + t2
def logcdf(x, mu, loc=0, scale=1): """ Logarithm of the CDF for the inverse Gaussian distribution. """ with mpmath.extradps(5): mu, loc, scale = _validate_params(mu, loc, scale) x = mpmath.mpf(x) if x <= loc: return -mpmath.mp.inf z = (x - loc) / scale t1 = mpmath.log(mpmath.ncdf((z / mu - 1) / mpmath.sqrt(z))) t2 = (2 / mu) + mpmath.log(mpmath.ncdf(-(z / mu + 1) / mpmath.sqrt(z))) return t1 + mpmath.log1p(mpmath.exp(t2 - t1))
def logsf(x, mu, loc=0, scale=1): """ Logarithm of the survival function for the inverse Gaussian distribution. """ with mpmath.extradps(5): mu, loc, scale = _validate_params(mu, loc, scale) x = mpmath.mpf(x) if x <= loc: return mpmath.mp.zero z = (x - loc) / scale t1 = mpmath.log(mpmath.ncdf(-(z / mu - 1) / mpmath.sqrt(z))) t2 = 2 / mu + mpmath.log(mpmath.ncdf(-(z / mu + 1) / mpmath.sqrt(z))) return t1 + mpmath.log1p(-mpmath.exp(t2 - t1))
def _norm_delta_cdf(a, b): """ Compute CDF(b) - CDF(a) for the standard normal distribution CDF. The function assumes a <= b. """ with mpmath.extradps(5): if a == b: return mpmath.mp.zero if a > 0: delta = mpmath.ncdf(-a) - mpmath.ncdf(-b) else: delta = mpmath.ncdf(b) - mpmath.ncdf(a) return delta
def sf(x, mu=0, sigma=1): """ Log-normal distribution survival function. """ _validate_sigma(sigma) if x <= 0: return mpmath.mp.one lnx = mpmath.log(x) return mpmath.ncdf(-lnx, -mu, sigma)
def cdf(x, mu=0, sigma=1): """ Log-normal distribution cumulative distribution function. """ _validate_sigma(sigma) if x <= 0: return mpmath.mp.zero lnx = mpmath.log(x) return mpmath.ncdf(lnx, mu, sigma)
def invsf(p, a, b): """ Inverse of the survival function of the standard normal distribution. """ _validate_params(a, b) if p < 0 or p > 1: return mpmath.nan with mpmath.extradps(5): p = mpmath.mpf(p) a = mpmath.mpf(a) b = mpmath.mpf(b) p2 = -p * _norm_delta_cdf(a, b) + mpmath.ncdf(b) x = normal.invcdf(p2) return x
def invcdf(p, a, b): """ Inverse of the CDF of the truncated standard normal distribution. This function is also known as the quantile function or the percent point function. """ _validate_params(a, b) if p < 0 or p > 1: return mpmath.nan with mpmath.extradps(5): p = mpmath.mpf(p) a = mpmath.mpf(a) b = mpmath.mpf(b) p2 = p * _norm_delta_cdf(a, b) + mpmath.ncdf(a) x = normal.invcdf(p2) return x
def mpmath_normal_cdf2(x, y, r): """ This function produces correct results for inputs currently present in /test/Tests/Data/SpecialFunctionsValues. Other inputs may fall into areas where currently present algorithms produce incorrect results and may require modifying this function. """ if x == -mpmath.inf or y == -mpmath.inf: return mpmath.mpf('0') if x == mpmath.inf: return mpmath.ncdf(y) if y == mpmath.inf: return mpmath.ncdf(x) if r == mpmath.mpf('1'): return mpmath.ncdf(min(x, y)) if r == mpmath.mpf('-1'): return mpmath.mpf('0') if x <= -y else mpmath.ncdf(x) - mpmath.ncdf(-y) if abs(y) > abs(x): z = x x = y y = z if r < 0: # phi(x,y,r) = phi(inf,y,r) - phi(-x,y,-r) return max(mpmath.ncdf(x) - mpmath_normal_cdf2(x, -y, -r), mpmath.mpf('0')) if x + y > 0: # phi(x,y,r) = phi(-x,-y,r) - phi(x,y,-1) return mpmath_normal_cdf2(-x, -y, r) + (mpmath.mpf('0') if x <= -y else mpmath.ncdf(x) - mpmath.ncdf(-y)) def f(t): if abs(t) == mpmath.mpf('1'): return mpmath.mpf('0') omt2 = (1 - t) * (1 + t) return 1 / (2 * mpmath.pi * mpmath.sqrt(omt2)) * mpmath.exp(-(x * x + y * y - 2 * t * x * y) / (2 * omt2)) result, err = mpmath.quad(f, [-1, r], error=True) if mpmath.mpf('1e50') * abs(err) > abs(result): print(f"Suspiciously big error when evaluating an integral for normal_cdf2({x}, {y}, {r}).") print(f"Integral: {result}") print(f"Integral error estimate: {err}") return result
def calculate_pval(self, gene_set: GeneSet, max_pairs: int = None) -> GeneSetDataCorrelation: """ Calculate p-val for a single gene-set. Are genes closer in space than expected. Compares gene set similarities to similarities between random pairs. :param gene_set: :param max_pairs: Should number of calculated similarities be limited :return: data with gene set pointer and pval, median and mean of similarity """ geneIDs = gene_set.genes try: set_similarities_data = self.calculator.similarities( geneIDs, max_n_similarities=max_pairs, as_list=False) except EnrichmentError: raise set_similarities = list(set_similarities_data.values()) mean_set = mean_list(set_similarities) median_set = median(set_similarities) n = len(set_similarities) if self.storage._summary_type == MEAN: center_set = mean_set elif self.storage._summary_type == MEDIAN: center_set = mean_set else: raise ValueError('Possible summary types are', MEAN, 'and', MEDIAN) se = self.storage.get_se(n) center_random = self.storage._center p = float(1 - mpmath.ncdf(center_set, mu=center_random, sigma=se)) gene_set_data = GeneSetDataCorrelation(gene_set) gene_set_data.mean = mean_set gene_set_data.median = median_set gene_set_data.pval = p gene_set_data.most_similar = self.retain_most_similar( set_similarities_data, 10) return gene_set_data
def odds_ratio(table, kind='conditional', alternative='two-sided'): r""" Compute the odds ratio for a 2x2 contingency table. Parameters ---------- table : array_like of ints A 2x2 contingency table. Elements must be non-negative integers. kind : str, optional Which kind of odds ratio to compute, either the sample odds ratio (``kind='sample'``) or the conditional odds ratio (``kind='conditional'``). Default is ``'conditional'``. alternative : {'two-sided', 'less', 'greater'}, optional Defines the alternative hypothesis. The following options are available (default is 'two-sided'): * 'two-sided' * 'less': one-sided * 'greater': one-sided Returns ------- result : `OddsRatioResult` instance The returned object has two computed attributes: odds_ratio : mpmath.mpf * If `kind` is ``'sample'``, this is ``table[0, 0]*table[1, 1]/(table[0, 1]*table[1, 0])``. This is the prior odds ratio and not a posterior estimate. * If `kind` is ``'conditional'``, this is the conditional maximum likelihood estimate for the odds ratio. It is the noncentrality parameter of Fisher's noncentral hypergeometric distribution with the same hypergeometric parameters as `table` and whose mean is ``table[0, 0]``. pvalue : fractions.Fraction or mpmath.mpf The p-value associated with the computed odds ratio. * If `kind` is ``'sample'``, the p-value is based on the normal approximation to the distribution of the log of the sample odds ratio. * If `kind` is ``'conditional'``, the p-value is computed by `mpsci.stats.fisher_exact`. The object also stores the input arguments `table`, `kind` and `alternative` as attributes. The object has the method `odds_ratio_ci` that computes the confidence interval of the odds ratio. References ---------- .. [1] J. Cornfield (1956), A statistical problem arising from retrospective studies. In Neyman, J. (ed.), Proceedings of the Third Berkeley Symposium on Mathematical Statistics and Probability 4, pp. 135-148. .. [2] H. Sahai and A. Khurshid (1996), Statistics in Epidemiology: Methods, Techniques, and Applications, CRC Press LLC, Boca Raton, Florida. """ if kind not in ['conditional', 'sample']: raise ValueError("kind must be 'conditional' or 'sample'.") if alternative not in ['two-sided', 'less', 'greater']: raise ValueError("alternative must be 'two-sided', 'less' or " "'greater'.") if len(table) != 2 or (len(table[0]) != 2 or len(table[1]) != 2): raise ValueError("The input `table` must be shaped like a 2x2 array.") a, b, c, d = _unpack_table_to_mpf(table) if a < 0 or b < 0 or c < 0 or d < 0: raise ValueError("All values in `table` must be nonnegative.") if _row_or_column_zero(table): # If both values in a row or column are zero, the p-value is 1 and # the odds ratio is NaN. result = OddsRatioResult(table=table, kind=kind, alternative=alternative, odds_ratio=mpmath.nan, pvalue=1) return result if kind == 'sample': oddsratio = _sample_odds_ratio(table) log_or = mpmath.log(oddsratio) se = mpmath.sqrt(1/a + 1/b + 1/c + 1/d) if alternative == 'two-sided': pvalue = 2*mpmath.ncdf(-abs(log_or)/se) elif alternative == 'less': pvalue = mpmath.ncdf(log_or/se) else: pvalue = mpmath.ncdf(-log_or/se) else: # kind is 'conditional' oddsratio = _conditional_oddsratio(table) # We can use fisher_exact to compute the p-value. pvalue = fisher_exact(table, alternative=alternative)[1] result = OddsRatioResult(table=table, kind=kind, alternative=alternative, odds_ratio=oddsratio, pvalue=pvalue) return result
def sf(x, mu=0, sigma=1): """ Normal distribution survival function. """ return mpmath.ncdf(-x, mu, sigma)
def standardNormalCDF(z): ''' Standard normal cumulative distribution function ''' return mpmath.ncdf(z)
def _psi(chi): return mpmath.ncdf(chi) - chi * mpmath.npdf(chi) - mpmath.mpf('0.5')
def f48(x): # erf_Q return 1 - mpmath.ncdf(x)
def f49(x): # hazard return mpmath.npdf(x) / (1 - mpmath.ncdf(x))
def cdf(x, mu=0, sigma=1): """ Normal distribution cumulative distribution function. """ # Defined here for consistency, but this is just mpmath.ncdf return mpmath.ncdf(x, mu, sigma)