def predict_proba(self, X, clean=False): #Store the test set into RGF format np.savetxt(os.path.join(loc_temp, "test.data.x"), X, delimiter=' ', fmt="%s") #Find latest model location model_glob = self.loc_temp + os.sep + self.prefix + "*" latest_model_loc = sorted(glob(model_glob),reverse=True)[0] #Format test command params = [] params.append("test_x_fn=%s"%os.path.join(loc_temp, "test.data.x")) params.append("prediction_fn=%s"%os.path.join(loc_temp, "predictions.txt")) params.append("model_fn=%s"%latest_model_loc) cmd = "%s predict %s 2>&1"%(self.loc_exec,",".join(params)) output = subprocess.Popen(cmd.split(),stdout=subprocess.PIPE,shell=True).communicate() #for k in output: # print(k) y_pred = np.array([logistic.pdf(x) for x in np.loadtxt(os.path.join(loc_temp, "predictions.txt"))]) y_pred = np.array([[1-x, x] for x in y_pred]) #Clean temp directory if clean: model_glob = self.loc_temp + os.sep + "*" for fn in glob(model_glob): if "predictions.txt" in fn or "model-" in fn or "train.data." in fn or "test.data." in fn: os.remove(fn) return y_pred
def plot_fit_I(self, nbins=20): """ Plots a comparison of the results of the fit to the input distribution """ me = np.zeros(nbins) st = np.zeros(nbins) m = np.zeros(nbins) k = np.zeros(nbins) # Extracts variables of interest mag = self.cat['mag_auto'][self.mask] I = np.log10(self.cat['sersicfit'][self.mask, 0]) R = np.log10(self.cat['sersicfit'][self.mask, 1]) n = np.log10(self.cat['sersicfit'][self.mask, 2]) q = np.log10(self.cat['sersicfit'][self.mask, 3]) plt.figure(figsize=(20, 5)) plt.subplot(141) for i in range(nbins): m_min = np.percentile(mag, i * 5) m_max = np.percentile(mag, (i + 1) * 5) ind = (mag > m_min) * (mag < m_max) m[i] = 0.5 * (m_min + m_max) me[i] = np.mean(I[ind]) st[i] = np.std(I[ind]) k[i] = kurtosis(I[ind]) plt.hist((I[ind] - me[i]) / st[i], 30, range=[-5, 5], alpha=0.2, normed=True) y = np.linspace(-5, 5) plt.plot(y, norm.pdf(y), 'b', label='Gaussian') plt.plot(y, logistic.pdf(y, scale=np.sqrt(3) / np.pi), 'r', label='Logistic') plt.legend() plt.title('Standardized $\log_{10}(I)$ in magnitude bins') plt.subplot(142) plt.plot(m, me, '+-') plt.plot(m, self._I_mu(m), 'r--') plt.title('Mean $\log_{10}(I)$ ') plt.axvspan(self.mag_range[0], self.mag_range[1], alpha=0.2, color='k') plt.subplot(143) plt.plot(m, st, '+-') plt.plot(m, self._I_std(m), 'r--') plt.axvspan(self.mag_range[0], self.mag_range[1], alpha=0.2, color='k') plt.title('Standard deviation of $\log_{10}(I)$ ') plt.subplot(144) plt.plot(m, k, '+-') plt.axhline(1.2, color='r') plt.axvspan(self.mag_range[0], self.mag_range[1], alpha=0.2, color='k') plt.title('Kurtosis')
def __likemaker__(self,x,b): (logL,dlogL,ddlogL) = (0,0,0) for i in range(self.n): xcur = x[i,:].reshape(-1,1) inner = xcur.T.dot(b) Fx = logistic.cdf(inner) logL += self.y[i]*np.log(Fx)+(1-self.y[i])*np.log(1-Fx) dlogL += (self.y[i]-Fx)*xcur ddlogL -= logistic.pdf(inner)*(xcur.dot(xcur.T)) return(logL,dlogL,ddlogL)
def radical_est_logist_val(mu, lmbd, delta, sample): i = 0 sample_size = len(sample) if delta == 0: result = 1 while i < sample_size: result = result * logistic.pdf(sample[i], mu, lmbd) i = i + 1 return result tmp_1 = pow(delta, 2.0) / (1.0 + delta) left_multiplier = pow(lmbd, tmp_1) / delta right_multiplier = 0 while i < sample_size: right_multiplier = right_multiplier + pow( logistic.pdf(sample[i], mu, lmbd), delta) i = i + 1 return left_multiplier * right_multiplier
def TVD(q): """ Computes Total Variation Distance between exact logistic and approximate logistic distributions q : pdf of the approximate distribution Omega : interval on which to evaluate TVD (defaults to interval in which the P(Omega)>1-machine_eps) """ mach_eps = np.finfo(float).eps lower = logit(mach_eps / 2) Omega, delta = np.linspace(lower, -lower, 10000, retstep=1) ## Approximate integral in Omega p = logistic.pdf(Omega) q = q(Omega) tvd = 0.5 * np.linalg.norm(p - q, ord=1) * delta return tvd
def figure_logistic_vs_normal(): from scipy.stats import logistic, norm fig, axs = plt.subplots(1, 2, figsize=(7, 3), squeeze=True) x = np.linspace(-5, 5, 100) axs[0].plot(x, logistic.pdf(x), label='Logistic') axs[0].plot(x, norm.pdf(x, 0, 1.8138), label='Normal') axs[0].set_title("Probability Density Functions\n(same mean and variance)") axs[0].legend() axs[1].plot(x, logistic.cdf(x), label='Logistic') axs[1].plot(x, norm.cdf(x, 0, 1.8138), label='Normal') axs[1].set_title("Cumulative Density Functions\n(same mean and variance)") axs[1].legend() return xmle.Show(fig)
def pathway_prediction(landa, a_init, mu, gamma, eta, tau, observed_weight_vector, pathway_dict, record_samples=True): number_of_pathways = np.size(eta, 0) number_of_metabolites = np.size(eta, 1) myModel = pm.Model() with myModel: landa_value = pm.Beta('landa_value', alpha=1, beta=1) # define prior a = pm.Bernoulli('a', p=landa_value, shape=number_of_pathways) # 1 x p # define posterior: p (w|a) l = pm.math.dot(a, eta) # 1xf: number of pathways that can generate each metabolite f phi = 1 - tt.exp(tt.log(1 - mu) * l) # 1xf: p(m_j = 1| a) psi = 1 - tt.exp(tt.dot(tt.log(1 - (gamma * phi)), tau)) # 1xk: p(w_k=1 | a) w = pm.Bernoulli('w', p=psi, observed=observed_weight_vector, shape=observed_weight_vector.shape) start_point = {'landa_value': landa, 'a': a_init.astype(np.int32)} step1 = pm.Metropolis([landa_value]) step2 = pm.BinaryGibbsMetropolis([a]) trace = pm.sample(draws=1000, step=[step1, step2], start=start_point, random_seed=42) landa_value_samples_logodds = trace.get_values(trace.varnames[0], burn=100) landa_value_samples = logistic.pdf(landa_value_samples_logodds) pathways_samples = trace.get_values(trace.varnames[1], burn=100) mean_pathways_activity = np.mean(pathways_samples, axis=0) if record_samples: outdata_dir = os.environ['PUMA_OUTPUT_DATA'] pathway_prediction_output = os.path.join(outdata_dir, 'pathway_prediction_output.xlsx') mean_pathways_activity_in_samples = np.squeeze(mean_pathways_activity).reshape(1, -1) write_data(mean_pathways_activity_in_samples, pathway_prediction_output, sheetname="samples", header=pathway_dict["pathway"]) print("mean_pathways_activity_PUMA_detected:", list(mean_pathways_activity)) n_active_pathways = len( [pathway_activity for pathway_activity in np.mean(pathways_samples, axis=0) if pathway_activity >= 0.5]) print("number_active_pathways [PUMA detected]:", n_active_pathways) active_pathways_indices = np.nonzero(mean_pathways_activity >= 0.5)[0] active_pathways_ID = [pathway_dict["pathway"][index] for index in active_pathways_indices] print("active_pathways_PUMA_detected:", active_pathways_ID) not_active_pathways_indices = np.nonzero(mean_pathways_activity < 0.5)[0] not_active_pathways_ID = [pathway_dict["pathway"][index] for index in not_active_pathways_indices] print("not_active_pathways_PUMA_detected:", not_active_pathways_ID) return pathways_samples
def logistic(shape, scale): """ Standard logistic noise multiplied by `scale` Parameters ---------- shape : tuple Shape of noise. scale : float Scale of noise. """ # from http://docs.scipy.org/doc/numpy/reference/generated/numpy.random.logistic.html density = lambda x: (np.product( np.exp(-x / scale) / (1 + np.exp(-x / scale))**2) / scale**(np.product(x.shape))) cdf = lambda x: logistic.cdf(x, loc=0., scale=scale) pdf = lambda x: logistic.pdf(x, loc=0., scale=scale) derivative_log_density = lambda x: (np.exp(-x / scale) - 1) / ( scale * np.exp(-x / scale) + 1) # negative log density is (with \mu=0) # x/s + log(s) + 2 \log (1 + e(-x/s)) grad_negative_log_density = lambda x: (1 - np.exp(-x / scale)) / ( (1 + np.exp(-x / scale)) * scale) sampler = lambda size: np.random.logistic( loc=0, scale=scale, size=shape + size) constant = -np.product(shape) * np.log(scale) return randomization( shape, density, cdf, pdf, derivative_log_density, grad_negative_log_density, sampler, lipschitz=.25 / scale**2, log_density=lambda x: -np.atleast_2d(x).sum(1) / scale - 2 * np. log(1 + np.exp(-np.atleast_2d(x) / scale)).sum(1) + constant)
bounds=(0, [counts / 2, counts, 1])) y_fit = f(x, a_, c_, r_) print r_ fig, ax1 = plt.subplots(1, 1, figsize=(6, 4)) inflection_points[thetastar] = math.log(a_) / r_ print inflection_points[thetastar] ax1.plot(x, y_fit, '--k', label='r = {}, inflection_point = {}'.format( r_, (math.log(a_) / r_))) ax1.plot(x, y, 'o') plt.legend() ax2 = ax1.twinx() ax2.plot(x, logistic.pdf(x, loc=(math.log(a_) / r_), scale=10), color='orange') plt.xlabel('Step') ax1.set_ylabel('Size of the largest connected component') ax2.set_ylabel('Probability density') plt.title( 'Logistic regression for the largest connected component over time for thetastar of {}' .format(thetastar)) plt.show() with open('inflection_points.json', 'w') as f: json.dump(inflection_points.items(), f, sort_keys=True) with open('one_agent_in_large_system.json', 'r') as fp: j = json.load(fp) j_dict = {} for i in range(0, len(j)):
mask = [] fails = 0 #----------- Mask each gene iteratively -----------# for cell in range(truth.shape[0]): nonZeroIdx = np.nonzero(truth[cell, :])[0] nonZeroVals = truth[cell, nonZeroIdx] if len(nonZeroVals) < 50: fails += 1 print("Cannot mask values for only {} cells".format(len(nonZeroVals))) mask.append([]) continue probs = logistic.pdf(np.log(nonZeroVals), *params) mask_c = np.random.choice(nonZeroIdx, N_MASKED_PER_CELL, p=probs / sum(probs), replace=False) raw[cell, mask_c] = 0 mask.append(mask_c) print("Counting masked values..") print(Counter(truth[(raw != truth)])) print(fails)
def density(self, x): return logistic.pdf(x, loc=self.mu, scale=self.sigma)
mean, var, skew, kurt = gamma.stats(a, moments = 'mvsk') x = np.linspace(gamma.ppf(0.01, a), gamma.ppf(0.99, a), 100) ax1.plot(x, gamma.pdf(x, a), 'r-', lw=5, alpha=0.6, label='gamma pdf') ax1.set_title('gamma pdf') ax2.plot(x, gamma.cdf(x, a), 'r-', lw=5, alpha=0.6, label='gamma cdf') ax2.set_title('gamma cdf') # logistic b = 0.5 mean, var, skew, kurt = logistic.stats(b, moments = 'mvsk') x = np.linspace(logistic.ppf(0.01, b), logistic.ppf(0.99, b), 100) ax3.plot(x, logistic.pdf(x, b), 'g-', lw=5, alpha=0.6, label='gamma pdf') ax3.set_title('logistic pdf') ax4.plot(x, logistic.cdf(x, b), 'g-', lw=5, alpha=0.6, label='gamma cdf') ax4.set_title('logistic cdf') # exponential a = 1.99 mean, var, skew, kurt = expon.stats(a, moments = 'mvsk') x = np.linspace(expon.ppf(0.01, a), expon.ppf(0.99, a), 100) ax5.plot(x, expon.pdf(x, a), 'b-', lw=5, alpha=0.6, label='gamma pdf') ax5.set_title('exponential pdf')
from scipy.stats import uniform x = np.linspace(0,12,100) y = uniform.pdf(x, loc=1, scale=1+9) plt.plot(x,y) pass ### ロジスティク分布(Logistic Distribution) ロジスティック分布のモジュール名は`logistic`。 ``` logistic.pdf(x, loc=0, scale=1) logistic.cdf(x, loc=0, scale=1) logistic.ppf(a, loc=0, scale=1) logistic.rvs(loc=0, scale=1, size=1) ``` * `loc`:平均値 * `scale`:分散に影響する値 ``` logistic.pdf(x,loc,scale) = logistic.pdf(z), z=(x-loc)/scale ``` `scipy.stats`の`logistic`を読み込む,確率密度関数の図を描く。 from scipy.stats import logistic x = np.linspace(-5,5,100) y = logistic.pdf(x) plt.plot(x,y) pass
# python3 import math import random import numpy as np from scipy.stats import logistic from matplotlib import pyplot as plt def inv_logistic_cdf(u): return math.log(u / (1 - u)) random.seed(1001) # random samples from Unif(0, 1) random_numbers = [random.random() for _ in range(12000)] # random samples of Logistic Dist. through the inverse transform random_numbers_from_logistic = [inv_logistic_cdf(random_number) for random_number in random_numbers] x = np.linspace(logistic.ppf(0.001), logistic.ppf(0.999), 100) plt.hist(random_numbers_from_logistic, 60, facecolor='green', normed=True, alpha=0.6, label='random numbers') plt.plot(x, logistic.pdf(x), lw=2, alpha=0.7, label='logistic pdf') plt.legend(loc='best')
def neg_weight_f(value): value = trim_value(value) return logistic.pdf(value) / (1 - logistic.cdf(value))
def pos_weight_f(value): value = trim_value(value) return logistic.pdf(value) / logistic.cdf(value)
def logistic_choice(self, total, sample_size, replace=False): p = logistic.pdf(np.arange(0,total), loc=0, scale=total/5.0) p /= np.sum(p) return np.random.choice(total, size=sample_size, replace=replace, p=p)
import numpy as np from scipy.stats import norm, logistic import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # định nghĩa hàm phân kì Kullback - Leibler def kl_divergence(p, q): return np.sum(np.where(p != 0, x * np.log2(p / q), 0)) # định nghĩa khoảng để viết các hàm mật độ xác suất (PDF) x_range = np.arange(-10, 10, 0.0001) # định nghĩa hàm mật độ xác suất (PDF) của các biến tương ứng x = norm.pdf(x_range, loc=0, scale=1) y1 = logistic.pdf(x_range, loc=0, scale=1) y2 = norm.pdf(x_range, loc=0.5, scale=1) y3 = norm.pdf(x_range, loc=-0.5, scale=1) # vẽ tất cả các hàm PDF trên cùng một plot plt.figure() plt.title('PDF of all random variables') plt.plot(x_range, x, label = "N(0,1)") plt.plot(x_range, y1, label = "Logistic(0,1)") plt.plot(x_range, y2, label = "N(0.5,1)") plt.plot(x_range, y3, label = "N(-0.5,1)") plt.legend(loc = "best") plt.show() # In[2]:
def plot_logistic_fit(w, tf, loc, scale, xlabel_count=4, legend_loc='upper right', out_file=None, ax=None): """ Plot time series for given word and the best-fit logistic distribution. Parameters: ----------- w : str tf : pandas.Series loc : float scale : float xlabel_count : int legend_loc : str out_file : str ax : matplotlib.axes.Axes """ label_font = 18 title_font = 24 tick_size = 14 legend_size = 14 N = len(tf) X = pd.np.arange(N) xlabels = sorted(tf.index) xlabel_interval = int(ceil(N / (xlabel_count))) + 1 xticks, xlabels = zip(*zip(X, xlabels)[::xlabel_interval]) xlabel = 'Date' ylabel = 'log(f)' logistic_y = logistic.pdf(X, loc=loc, scale=scale) # rescale logistic y to match tf: # y_logistic_rescaled = y_logistic * y_sum + y_offset y_offset = tf.min() tf_rescaled = tf - y_offset logistic_y_rescaled = logistic_y * tf_rescaled.sum() + y_offset series_color = 'r' fit_color = 'b' series_linestyle = '-' fit_linestyle = '--' split_color = 'k' split_linestyle = '--' single_axis = ax is None if(single_axis): plt.figure(figsize=(5,5)) ax = plt.subplot(111) l1, = ax.plot(X, tf, color=series_color, linestyle=series_linestyle) l2, = ax.plot(X, logistic_y_rescaled, color=fit_color, linestyle=fit_linestyle) # add legend lines = [l1, l2] # labels = [w, 'logistic_fit'] labels = ['observed', 'logistic fit'] ax.legend(lines, labels, fontsize=legend_size, loc=legend_loc) # add dotted line for split point # ylim = ax.get_ylim() # ax.plot([loc, loc], ylim, color=split_color, linestyle=split_linestyle) # set ticks ax.set_xticks(xticks) ax.set_xticklabels(xlabels, fontsize=tick_size) yticks = ax.get_yticks() ylabels = map(lambda t: '%.2f'%(t), yticks) ax.set_yticks(yticks) ax.set_yticklabels(ylabels, fontsize=tick_size) ax.set_title(w, fontsize=title_font) # if single axis, add x and labels if(single_axis): ax.set_xlabel(xlabel, fontsize=label_font) ax.set_ylabel(ylabel, fontsize=label_font) if(out_file is not None): plt.tight_layout() plt.savefig(out_file)
from scipy.stats import logistic import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) # Calculate a few first moments: mean, var, skew, kurt = logistic.stats(moments='mvsk') # Display the probability density function (``pdf``): x = np.linspace(logistic.ppf(0.01), logistic.ppf(0.99), 100) ax.plot(x, logistic.pdf(x), 'r-', lw=5, alpha=0.6, label='logistic pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = logistic() ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = logistic.ppf([0.001, 0.5, 0.999]) np.allclose([0.001, 0.5, 0.999], logistic.cdf(vals)) # True # Generate random numbers: r = logistic.rvs(size=1000)
def make_logistic_plot(self): #x = np.linspace(logistic.ppf(0.01), logistic.ppf(0.99), 100) x = np.linspace(1, 100, 100) pdf = logistic.pdf(self.probs[:100]) self.line_logistic, = plt.plot(x, pdf, linewidth=2, label="logistic", color="r")