def _bradford_fitstart(self, data, fitstart): # pab loc = data.min() - 1e-4 scale = (data - loc).max() m = np.mean((data - loc) / scale) fun = lambda c: (c - sc.log1p(c)) / (c * sc.log1p(c)) - m res = optimize.root(fun, 0.3) c = res.x return c, loc, scale
def _logpdf(self, x, df, mean, prec_U, log_det_cov, rank): """ Parameters ---------- x : ndarray Points at which to evaluate the log of the probability density function df : float Degrees of freedom of the distribution mean : ndarray Mean of the distribution prec_U : ndarray A decomposition such that np.dot(prec_U, prec_U.T) is the precision matrix, i.e. inverse of the covariance matrix. log_det_cov : float Logarithm of the determinant of the covariance matrix rank : int Rank of the covariance matrix. Notes ----- As this function does no argument checking, it should not be called directly; use 'logpdf' instead. :param df: """ dev = x - mean maha = log1p(np.sum(np.square(np.dot(dev, prec_U)), axis=-1) / df) gams = gammaln(0.5 * (df + rank)) - gammaln(0.5 * df) return gams - 0.5 * (rank * (np.log(df) + _LOG_PI) + log_det_cov + (df + rank) * maha)
def run(self, data): data_norm = cell_normalize(data) if sparse.issparse(data_norm): data_norm = data_norm.log1p() else: data_norm = log1p(data_norm) W = self.nmf.fit_transform(data_norm) H = self.nmf.components_ if sparse.issparse(data_norm): cost = 0 #ws = sparse.csr_matrix(W) #hs = sparse.csr_matrix(H) #cost = 0.5*((data_norm - ws.dot(hs)).power(2)).sum() else: cost = 0.5 * ((data_norm - W.dot(H))**2).sum() if 'normalize_h' in self.params: print('normalize h') H = H / H.sum(0) output = [] if self.return_h: output.append(H) if self.return_w: output.append(W) if self.return_wh: output.append(W.dot(H)) if self.return_mds: X = dim_reduce(W, H, 2) output.append(X.T.dot(H)) return output, cost
def run(self, data): if sparse.issparse(data): data = data.toarray() if self.use_log: data = log1p(data) data_pca = self.pca.fit_transform(data.T) labels = self.km.fit_predict(data_pca) return labels
def softplus(z): """Numerically stable version of log(1 + exp(z)).""" # see stabilizing softplus: http://sachinashanbhag.blogspot.com/2014/05/numerically-approximation-of-log-1-expy.html # noqa mu = z.copy() mu[z > 35] = z[z > 35] mu[z < -10] = np.exp(z[z < -10]) mu[(z >= -10) & (z <= 35)] = log1p(np.exp(z[(z >= -10) & (z <= 35)])) return mu
def get_better_dicts(clean, spam): only_clean_json = {} only_spam_json = {} all_keys = (clean | spam).keys() # max_value = max(max([clean[key] for key in clean]),max(spam[key] for key in spam)) for key in all_keys: if len(key) <= 2: continue if key not in spam: only_clean_json[key] = log1p(clean[key]) elif key not in clean: only_spam_json[key] = log1p(spam[key]) # else: # if clean[key] > spam[key]: # only_clean_json[key] = expit((clean[key] - spam[key])/10) # elif spam[key] > clean[key]: # only_spam_json[key] = expit((spam[key] - clean[key])/10) return only_clean_json, only_spam_json
def run(self, data): if sparse.issparse(data): data = data.toarray() if self.use_log: data = log1p(data) if self.use_exp: data = (10**data) - 1 data_tsne = self.tsne.fit_transform(data.T) labels = self.km.fit_predict(data_tsne) return labels
def run(self, data): data_norm = cell_normalize(data) if sparse.issparse(data_norm): data_norm = data_norm.log1p() else: data_norm = log1p(data_norm) W, H = nmf_tsne(data_norm, **self.params) if sparse.issparse(data_norm): ws = sparse.csr_matrix(W) hs = sparse.csr_matrix(H) cost = 0.5 * ((data_norm - ws.dot(hs)).power(2)).sum() else: cost = 0.5 * ((data_norm - W.dot(H))**2).sum() return [H, W.dot(H)], cost
def _stats(self, p): r = special.log1p(-p) mu = p / (p - 1.0) / r mu2p = -p / r / (p - 1.0)**2 var = mu2p - mu*mu mu3p = -p / r * (1.0+p) / (1.0 - p)**3 mu3 = mu3p - 3*mu*mu2p + 2*mu**3 g1 = mu3 / np.power(var, 1.5) mu4p = -p / r * ( 1.0 / (p-1)**2 - 6*p / (p - 1)**3 + 6*p*p / (p-1)**4) mu4 = mu4p - 4*mu3p*mu + 6*mu2p*mu*mu - 3*mu**4 g2 = mu4 / var**2 - 3.0 return mu, var, g1, g2
def _stats(self, p): r = special.log1p(-p) mu = p / (p - 1.0) / r mu2p = -p / r / (p - 1.0)**2 var = mu2p - mu * mu mu3p = -p / r * (1.0 + p) / (1.0 - p)**3 mu3 = mu3p - 3 * mu * mu2p + 2 * mu**3 g1 = mu3 / np.power(var, 1.5) mu4p = -p / r * (1.0 / (p - 1)**2 - 6 * p / (p - 1)**3 + 6 * p * p / (p - 1)**4) mu4 = mu4p - 4 * mu3p * mu + 6 * mu2p * mu * mu - 3 * mu**4 g2 = mu4 / var**2 - 3.0 return mu, var, g1, g2
def neg_loglikelihood(beta, Y, X): """ Summary ------- Loss function of the logistic regression. Parameters ---------- beta: 'numpy array' Parameters of the logistic regression. Y: 'numpy array' Response variable vector. X: 'numpy matrix' Matrix of covariates. Returns ------- Loss function. """ # sum without NAs return -np.nansum(Y*np.matmul(X,beta) - scisp.log1p(1+scisp.expm1(np.matmul(X,beta))))
def log_likelihood(t): if not inbounds(t): return -np.inf p = t[0] mu = t[1:] n = 10 rv0 = scipy.stats.poisson(mu[0]) rv1 = scipy.stats.poisson(mu[1]) #a = p * rv0.pmf(deaths) #b = (1 - p) * rv1.pmf(deaths) #try: #return freqs.dot(np.log(a+b)) #except RuntimeWarning as e: #print(p, mu) #print(a+b) #raise ll = 0 for i in range(n): loga = np.log(p) + rv0.logpmf(deaths[i]) logb = log1p(-p) + rv1.logpmf(deaths[i]) ll += freqs[i] * logsumexp([loga, logb]) return ll
def _ppf(self, f, c, q): return (np.log(q) - special.log1p(-f))**(1. / c)
def eval(self, x): return log1p(np.exp(-x))
def clog1p(x, y): z = log1p(x + 1j*y) return z.real, z.imag
def _pmf(self, k, p): return -np.power(p, k) * 1.0 / k / special.log1p(-p)
def _ppf(self, f): return sqrt(-2 * special.log1p(-f))
def _pmf(self, k, p): # logser.pmf(k) = - p**k / (k*log(1-p)) return -np.power(p, k) * 1.0 / k / special.log1p(-p)
print numpy.log2(1024) print numpy.log10(0) print numpy.log(XXX) #it's ln print numpy.exp(1) print numpy.e, numpy.pi # 数学公式 # 不明白为什么叫special啊 import scipy.special as S print S.log1p(1e-20) ## 计算 ln(1 + 1e-20) # 矩阵相关 # 我觉得这里有很多工具 http://blog.sina.com.cn/s/blog_70586e000100moen.html x = numpy.array([ [0, 0, 0], [1, 0, 0], [1, 1, 0], [1, 0, 1], ]).T # 样本向量都按标准的列的形式给出 print numpy.cov(x, bias=1) # 如果需要除以 N 而不是 N-1, 则 bias=1
def _mu(z): return log1p(np.exp(z))
import matplotlib.pyplot as plt plt.style.use('fivethirtyeight') import matplotlib as mpl mpl.rcParams['font.sans-serif'] = ['SimHei'] from scipy import constants as C print(C.c) print(C.h) print(C.physical_constants['electron mass']) print(C.mile) print(C.inch) print(C.gram) print(C.pound) import scipy.special as S print(S.log1p(1e-20)) m = np.linspace(0.1, 0.9, 4) n = np.linspace(-10, 10, 100) results = S.ellipj(n[:, None], m[None, :]) print([y.shape for y in results]) #spatial空间算法库 ##取相邻最近点 from scipy import spatial import matplotlib as mpl mpl.rcParams['font.sans-serif'] = ['SimHei'] import pylab as pl import numpy as np x = np.sort(np.random.rand(100))
def _logcdf(self, x, c, q): return special.log1p(-np.exp(-x**c + log(q)))
def veval(self, X): return log1p(np.exp(-X))
def em_update(t): #print('attempting em input:', t) if not inbounds(t): raise Exception('input to the em update is out of bounds (%s)' % t) ll_before_update = log_likelihood(t) p0 = t[0] mu = t[1:] # define the poisson components rs = [scipy.stats.poisson(m) for m in mu] # compute the per-count posterior distribution over pi_log_weights = np.empty((n, 2), dtype=float) # vectorize this later for i in range(n): if p0 > 0: alpha = np.log(p0) beta = rs[0].logpmf(deaths[i]) loga = alpha + beta if np.isnan(loga): print('nan', mu[0], deaths[i], alpha, beta, loga) else: loga = -np.inf if p0 < 1: alpha = log1p(-p0) beta = rs[1].logpmf(deaths[i]) logb = alpha + beta if np.isnan(logb): print('nan', mu[1], deaths[i], alpha, beta, logb) else: logb = -np.inf pi_log_weights[i, 0] = loga pi_log_weights[i, 1] = logb # convert log weights to a distribution, being careful about scaling pi = np.empty((n, 2), dtype=float) for i in range(n): pi[i] = log_weights_to_distn(pi_log_weights[i]) #pi_weights[:, 0] = p0 * np.power(mu[0], deaths) * np.exp(-mu[0]) #pi_weights[:, 1] = (1-p0) * np.power(mu[1], deaths) * np.exp(-mu[1]) #pi = pi_weights / pi_weights.sum(axis=1)[:, None] # compute updated parameter values p_star = freqs.dot(pi[:, 0]) / freqs.sum() #print('em step p_star:', p_star) mu_star = np.zeros(2, dtype=float) for j in range(2): numer = sum(deaths[i] * freqs[i] * pi[i, j] for i in range(n)) denom = sum(freqs[i] * pi[i, j] for i in range(n)) if numer: mu_star[j] = numer / denom else: raise DegenerateMixtureError('a poisson mean is zero') #mu_star_numer = (deaths[:, None] * freqs[:, None] * pi).sum(axis=0) #mu_star_denom = (freqs[:, None] * pi).sum(axis=0) #try: #mu_star = xdivy(mu_star_numer, mu_star_denom) #except RuntimeWarning: #print(mu_star_numer) #print(mu_star_denom) #raise t_star = np.array([p_star, mu_star[0], mu_star[1]]) ll_after_update = log_likelihood(t_star) #if ll_after_update < ll_before_update: #print('log likelihoods:', ll_before_update, ll_after_update) #raise Exception('em step reduced observed data log likelihood') if not inbounds(t_star): raise Exception('em update output is out of bounds (%s)' % t_star) return t_star
def _ppf(self, q, c): return np.sqrt(c * c - 2 * sc.log1p(-q)) - c #pylint: disable=no-member
def _ppf(self, f, c): return (-special.log1p(-f))**(1.0 / c)