def thompsonBernoulli(a, b, banditArms): numArms = len(banditArms) successCounters = np.zeros(numArms) failCounters = np.zeros(numArms) thetas = np.zeros(numArms) for t in range(5000): # draw arms according to beta distribution for i in range(numArms): thetas[i] = beta(successCounters[i] + a, failCounters[i] + b).rvs() # get the arm with max theta maxArmInd = np.argmax(thetas) # draw the maxArmInd and observe the reward reward = banditArms[maxArmInd].rvs() if reward == 1: successCounters[maxArmInd] += 1 else: failCounters[maxArmInd] += 1 betaDists = [beta(successCounters[i] + a, failCounters[i] + b) for i in range(numArms)] return betaDists
def calc(): a, b = 100, 1 e1 = beta(a, b).entropy() a, b = 100, 2 e2 = beta(a, b).entropy() # if e2>e1 , entropy is increased regardness additional observation. print e2 - e1, e2 > e1
def test_product_basis(self): import time comp = (stats.beta(0.5, 0.5), stats.beta(0.5, 0.5)) rv = best.random.RandomVectorIndependent(comp) print str(rv) prod = best.gpc.ProductBasis(degree=10, rv=rv) print str(prod) x = rv.rvs(size=10) print prod(x).shape x1 = np.linspace(1e-4, 0.99, 64) x2 = np.linspace(1e-4, 0.99, 64) X1, X2 = np.meshgrid(x1, x2) xx = np.vstack([X1.flatten(), X2.flatten()]).T z = rv.pdf(xx) Z = z.reshape((64, 64)) #plt.contourf(X1, X2, np.log(Z).T) #plt.show() start_time = time.time() phi = prod(xx) end_time = time.time() print("Elapsed time was %g seconds" % (end_time - start_time)) print phi.shape for j in range(phi.shape[1]): plt.contourf(X1, X2, phi[:, j].reshape((64, 64))) plt.show()
def make_first_set_of_plots(): N = 1000 x = zeros(shape=(N,), dtype=float) t = None tmax = 10 axis([0,tmax,0,1]) for i in range(N): t, y = random_walk(0.25, tmax, 0.01, t) x[i] = y[-1] if (i < 3): plot(t, (y+1)/2.0) xlabel("time") ylabel("CTR") savefig("random_walk.png") clf() subplot(211) hist((x+1)/2, bins=50) ylabel("Monte carlo results") subplot(212) best_fit = beta.fit((x+1)/2, floc=0, fscale=1) print best_fit ctr = arange(0,1,0.001) plot(ctr, beta(1,4).pdf(ctr), label="Invariant distribution, beta(1,4)") plot(ctr, beta(best_fit[0],best_fit[1]).pdf(ctr), label="Best fit, beta("+str(best_fit[0]) + "," + str(best_fit[1]) + ")") xlabel("CTR at t="+str(tmax)) ylabel("pdf") legend() savefig("long_term_random_walk_result.png")
def sample_hyperparameters(state): # http://bit.ly/1baZ3zf T = state['T'] num_samples = 10 # R aalpha = 5 balpha = 0.1 abeta = 0.1 bbeta = 0.1 bgamma = 0.1 # ? agamma = 5 # ? # for (int r = 0; r < R; r++) { for r in range(num_samples): # gamma: root level (Escobar+West95) with n = T eta = beta(state['gamma'] + 1, T).rvs() bloge = bgamma - np.log(eta) K = state['num_topics'] pie = 1. / (1. + (T * bloge / (agamma + K - 1))) u = bernoulli(pie).rvs() state['gamma'] = gamma(agamma + K - 1 + u, 1. / bloge).rvs() # alpha: document level (Teh+06) qs = 0. qw = 0. for m, doc in enumerate(state['docs']): qs += bernoulli(len(doc) * 1. / (len(doc) + state['alpha'])).rvs() qw += np.log(beta(state['alpha'] + 1, len(doc)).rvs()) state['alpha'] = gamma(aalpha + T - qs, 1. / (balpha - qw)).rvs() state = update_beta(state, abeta, bbeta) return state
def main(): dist = stats.beta(10,5) target = stats.beta(10,20) pvar = 0.01 steps = 100000 bins = 10 savefile = 'states.csv' plotfile = 'dists.png' prev = np.random.random() states = [] counts = np.zeros(bins) counts[rounded(prev,bins)] += 1 for i in xrange(steps): cur = stats.norm(prev,pvar).rvs() counts[rounded(cur,bins)] += 1 counts /= np.sum(counts) curlik = dist.pdf(cur)*target.pdf(cur) a = dist.pdf(cur)/dist.pdf(prev) if a > np.random.random(): prev = cur states.append(prev) np.savetxt(savefile, states) xr = np.linspace(0,1,1000) plt.plot(xr,dist.pdf(xr)) plt.hist(states, alpha=.5, normed=1) plt.savefig(plotfile)
def f3TruncNormRVSnp(parameters): N = parameters['N'] target = parameters['target'] rv1, rv2, rv3 = ndarray(shape = (N,), dtype=float), ndarray(shape = (N,), dtype=float), ndarray(shape = (N,), dtype=float) # if parameters['ncpu']: # ncpu = parameters['ncpu'] # else: # ncpu = mp.cpu_count() # # pool = mp.Pool(ncpu) # workers = [] if not parameters['distribution']: print 'No distribution set...abort' exit(1) elif parameters['distribution'] == 'truncnorm': a1, b1 = (parameters['min_intrv1'] - parameters['mu1']) / parameters['sigma1'], (parameters['max_intrv1'] - parameters['mu1']) / parameters['sigma1'] a2, b2 = (parameters['min_intrv2'] - parameters['mu2']) / parameters['sigma2'], (parameters['max_intrv2'] - parameters['mu2']) / parameters['sigma2'] a3, b3 = (parameters['min_intrv3'] - parameters['mu3']) / parameters['sigma3'], (parameters['max_intrv3'] - parameters['mu3']) / parameters['sigma3'] rv1 = truncnorm(a1, b1, loc=parameters['mu1'], scale=parameters['sigma1']).rvs(N) rv2 = truncnorm(a2, b2, loc=parameters['mu2'], scale=parameters['sigma2']).rvs(N) rv3 = truncnorm(a3, b3, loc=parameters['mu3'], scale=parameters['sigma3']).rvs(N) elif parameters['distribution'] == 'norm': rv1 = norm(loc=parameters['mu1'], scale=parameters['sigma1']).rvs(N) rv2 = norm(loc=parameters['mu2'], scale=parameters['sigma2']).rvs(N) rv3 = norm(loc=parameters['mu3'], scale=parameters['sigma3']).rvs(N) elif parameters['distribution'] == 'uniform': rv1 = uniform(loc=parameters['mu1'], scale=parameters['sigma1']).rvs(N) rv2 = uniform(loc=parameters['mu2'], scale=parameters['sigma2']).rvs(N) rv3 = uniform(loc=parameters['mu3'], scale=parameters['sigma3']).rvs(N) elif parameters['distribution'] == 'beta': rv1 = beta(a=parameters['min_intrv1'], b=parameters['max_intrv1'], loc=parameters['mu1'], scale=parameters['sigma1']).rvs(N) rv2 = beta(a=parameters['min_intrv2'], b=parameters['max_intrv2'], loc=parameters['mu2'], scale=parameters['sigma2']).rvs(N) rv3 = beta(a=parameters['min_intrv3'], b=parameters['max_intrv3'], loc=parameters['mu3'], scale=parameters['sigma3']).rvs(N) elif parameters['distribution'] == 'triang': rv1 = triang(loc=parameters['min_intrv1'], scale=parameters['max_intrv1'], c=parameters['mu1']).rvs(N) rv2 = triang(loc=parameters['min_intrv2'], scale=parameters['max_intrv2'], c=parameters['mu2']).rvs(N) rv3 = triang(loc=parameters['min_intrv3'], scale=parameters['max_intrv3'], c=parameters['mu3']).rvs(N) else: print 'Distribution not recognized...abort' exit(1) if parameters['scaling']: #scale the values of Qs in the allowed range such that sum(Q_i) = A r = ABS(parameters['Q1']) + ABS(parameters['Q2']) + ABS(parameters['Q3']) if r == 0.0: r = 1. # rounding the values, the sum could exceed A Q1 = ABS(parameters['Q1']) * parameters['A'] / r Q2 = ABS(parameters['Q2']) * parameters['A'] / r Q3 = parameters['A'] - Q1 - Q2 else: # print "scaling = False" Q1 = parameters['Q1'] Q2 = parameters['Q2'] Q3 = parameters['Q3'] return _f3(rv1, rv2, rv3, Q1, Q2, Q3, target)
def greedy_allocation3(parameters): """ Greedy heuristic for 3 supplier (the same as heu_allocation3 but with different parameters) Does not write on the file but returns the solution :param df: dataframe containing the data from the excel file :param parameters: parameters dict :return: write o the df and save on the file """ if not parameters['distribution']: print 'No distribution set...abort' exit(1) elif parameters['distribution'] == 'truncnorm': rv1 = truncnorm_custom(parameters['min_intrv1'], parameters['max_intrv1'], parameters['mu1'], parameters['sigma1']) rv2 = truncnorm_custom(parameters['min_intrv2'], parameters['max_intrv2'], parameters['mu2'], parameters['sigma2']) rv3 = truncnorm_custom(parameters['min_intrv3'], parameters['max_intrv3'], parameters['mu3'], parameters['sigma3']) elif parameters['distribution'] == 'norm': rv1 = norm(parameters['mu1'], parameters['sigma1']) rv2 = norm(parameters['mu2'], parameters['sigma2']) rv3 = norm(parameters['mu3'], parameters['sigma3']) elif parameters['distribution'] == 'uniform': rv1 = uniform(loc=parameters['mu1'], scale=parameters['sigma1']) rv2 = uniform(loc=parameters['mu2'], scale=parameters['sigma2']) rv3 = uniform(loc=parameters['mu3'], scale=parameters['sigma3']) elif parameters['distribution'] == 'beta': rv1 = beta(a=parameters['min_intrv1'], b=parameters['max_intrv1'], loc=parameters['mu1'], scale=parameters['sigma1']) rv2 = beta(a=parameters['min_intrv2'], b=parameters['max_intrv2'], loc=parameters['mu2'], scale=parameters['sigma2']) rv3 = beta(a=parameters['min_intrv3'], b=parameters['max_intrv3'], loc=parameters['mu3'], scale=parameters['sigma3']) elif parameters['distribution'] == 'triang': rv1 = triang(loc=parameters['min_intrv1'], scale=parameters['max_intrv1'], c=parameters['mu1']) rv2 = triang(loc=parameters['min_intrv2'], scale=parameters['max_intrv2'], c=parameters['mu2']) rv3 = triang(loc=parameters['min_intrv3'], scale=parameters['max_intrv3'], c=parameters['mu3']) else: print 'Distribution not recognized...abort' exit(1) A = parameters['A'] Q = {i: 0 for i in xrange(3)} while A > 0: best_probability = -1 best_retailer = -1 for n, r in enumerate([rv1, rv2, rv3]): p = 1 - r.cdf(Q[n]+1) if p > best_probability: best_probability = p best_retailer = n Q[best_retailer] += 1 A -= 1 parameters['Q1'] = Q[0] parameters['Q2'] = Q[1] parameters['Q3'] = Q[2] return {'Q1': Q[0], 'Q2': Q[1], 'Q3': Q[2], 'PROB': f3TruncNormRVSnp(parameters)}
def sample_sticks(self): for node in self.tssb.dfs(): node.nu = stats.beta(node.point_count + 1, node.path_count + node.alpha).rvs() children = sorted(list(node.children.keys()))[::-1] count = 0 for i in children: child = node.children[i] node.psi[i] = stats.beta(child.path_count + 1, count + node.gamma).rvs() count += child.path_count
def estimate(self, time): if len(self.estimates) == 0: return stats.beta(1, 1) else: latest, alpha, beta = self.estimates[-1] trust = self.trust(time - latest) alpha = 1 + trust * (alpha - 1) beta = 1 + trust * (beta - 1) return stats.beta(alpha, beta)
def generate_samples(): rv1 = beta(1.0 / 3, 1.0) rv2 = beta(0.5, 0.5) sample1 = rv1.rvs(size=N) np.save('rv1_sample', sample1) np.save('rv1_pdf', rv1.pdf(sample1)) sample2 = rv2.rvs(size=N) np.save('rv2_sample', sample2) np.save('rv2_pdf', rv2.pdf(sample2))
def test_jacobi_consistency(): import scipy.stats as stats dist = stats.beta(2, 3, loc= -1, scale=2) p = JacobiPolynomials(alpha=2, beta=1, a= -1, b=1, normalised=False) assert_false(p.normalised) _check_poly_consistency(p, dist) dist = stats.beta(2, 1.5, loc= -2, scale=5) p = JacobiPolynomials(alpha=0.5, beta=1, a= -2, b=3) assert_true(p.normalised) _check_poly_consistency(p, dist)
def main(): results = [] plot(results, save=True) for n in range(500): result = np.random.binomial(1, p=0.25) results.append(result) a , b = sum(results) , len(results) - sum(results) mean, std, entropy = beta(a + 1 , b + 1).mean(), beta(a + 1 , b + 1).std(), beta(a + 1 , b + 1).entropy() # print "face:" + str(a) + "," + "tail:" + str(b) # print "mean:%1.3f,std:%1.3f,entropy:%1.3f" % (beta(a + 1, b + 1).mean(), beta(a + 1, b + 1).std(), beta(a + 1, b + 1).entropy()) if len(results) % 10 == 0: plot(results, save=True)
def dirichlet_sample_approximation(base_measure, alpha, tol=0.01): betas = [] pis = [] betas.append(beta(1, alpha).rvs()) #sample from beta function(1, alpha) pis.append(betas[0]) while sum(pis) < (1.-tol): #sum(pis) to infinity, we can get 1.. s = np.sum([np.log(1 - b) for b in betas]) new_beta = beta(1, alpha).rvs() betas.append(new_beta) pis.append(new_beta * np.exp(s)) pis = np.array(pis) thetas = np.array([base_measure() for _ in pis]) return pis, thetas
def modality_models(): parameter = 20. rv_included = stats.beta(parameter, 1) rv_excluded = stats.beta(1, parameter) rv_middle = stats.beta(parameter, parameter) rv_uniform = stats.uniform(0, 1) rv_bimodal = stats.beta(1. / parameter, 1. / parameter) models = {'included': rv_included, 'excluded': rv_excluded, 'middle': rv_middle, 'uniform': rv_uniform, 'bimodal': rv_bimodal} return models
def modality_models(): parameter = 20. rv_psi1 = stats.beta(parameter, 1) rv_psi0 = stats.beta(1, parameter) rv_middle = stats.beta(parameter, parameter) rv_ambiguous = stats.uniform(0, 1) rv_bimodal = stats.beta(1. / parameter, 1. / parameter) models = {'Psi~1': rv_psi1, 'Psi~0': rv_psi0, 'middle': rv_middle, 'ambiguous': rv_ambiguous, 'bimodal': rv_bimodal} return models
def plot_betas(): xs=linspace(0, 1, 30) plt.plot(xs, [beta(4, 2).pdf(x) for x in xs], 'bs', xs, [beta(2, 2).pdf(x) for x in xs], 'g^' ) font = {'family' : 'serif', 'color' : 'darkred', 'weight' : 'normal', 'size' : 18, } plt.title('The Beta Distribution', fontdict=font) plt.text(0.2, 1.5, r'$\alpha=\beta=2$', fontdict=font) plt.text(0.45, 2, r'$\alpha=4$,$\beta=2$', fontdict=font) plt.xlabel('causal-strength', fontdict=font) plt.ylabel('Density', fontdict=font) plt.show()
def hpd_beta(y, n, h=.1, a=1, b=1, plot=False, **plot_kwds): apost = y + a bpost = n - y + b if apost > 1 and bpost > 1: mode = (apost - 1)/(apost + bpost - 2) else: raise Exception("mode at 0 or 1: HPD not implemented yet") post = stats.beta(apost, bpost) dmode = post.pdf(mode) lt = opt.bisect(lambda x: post.pdf(x) / dmode - h, 0, mode) ut = opt.bisect(lambda x: post.pdf(x) / dmode - h, mode, 1) coverage = post.cdf(ut) - post.cdf(lt) if plot: plt.figure() plotf(post.pdf) plt.axhline(h*dmode) plt.plot([ut, ut], [0, post.pdf(ut)]) plt.plot([lt, lt], [0, post.pdf(lt)]) plt.title(r'$p(%s < \theta < %s | y)$' % tuple(np.around([lt, ut], 2))) return lt, ut, coverage, h
def main(): dist = stats.beta(10,5) steps = 100000 size = 100 alpha = 0.05 changes = int(alpha*size) savefile = 'states.csv' plotfile = 'dists.png' # distrvs = dist.rvs(size) current = np.random.random(size) cur_ks = ks(current, dist.cdf)[1] states = np.zeros((steps,size)) for i in xrange(steps): prop = np.copy(current) prop[np.random.choice(range(size),changes)] = np.random.random(changes) prop_ks = ks(prop,dist.cdf)[1] diff = prop_ks-cur_ks if diff>0: current = prop cur_ks = prop_ks states[i] = current print cur_ks np.savetxt(savefile, states)
def __init__(self, alpha, beta): self.alpha = alpha self.beta = beta # set dist before calling super's __init__ self.dist = st.beta(alpha, beta) super(Beta, self).__init__()
def pickB(c,d,var): a=d*d/8./var-0.5 beta=dists.beta(a,a,loc=c,scale=d) yB=[] for x in xs: yB.append(beta.pdf(x)) plt.plot(xs,yB,label='c='+str(c))
def test_init(self, alphas, betas): from flotilla.compute.splicing import ModalityModel model = ModalityModel(alphas, betas) true_alphas = alphas true_betas = betas if not isinstance(alphas, Iterable) and not isinstance(betas, Iterable): true_alphas = [alphas] true_betas = [betas] true_alphas = np.array(true_alphas) \ if isinstance(true_alphas, Iterable) else np.ones( len(true_betas)) * true_alphas true_betas = np.array(true_betas) \ if isinstance(true_betas, Iterable) else np.ones( len(true_alphas)) * true_betas true_rvs = [stats.beta(a, b) for a, b in zip(true_alphas, true_betas)] true_scores = np.ones(true_alphas.shape).astype(float) true_scores = true_scores / true_scores.max() true_prob_parameters = true_scores / true_scores.sum() npt.assert_array_equal(model.alphas, true_alphas) npt.assert_array_equal(model.betas, true_betas) npt.assert_array_equal(model.scores, true_scores) npt.assert_array_equal(model.prob_parameters, true_prob_parameters) for test_rv, true_rv in zip(model.rvs, true_rvs): npt.assert_array_equal(test_rv.args, true_rv.args)
def plot_beta_dist( ctr, trials, success, alphas, betas, turns ): """ Pass in the ctr, trials and success, alphas, betas returned by the `experiment` function and the number of turns and plot the beta distribution for all the arms in that turn """ subplot_num = len(turns) / 2 x = np.linspace( 0.001, .999, 200 ) fig = plt.figure( figsize = ( 14, 7 ) ) for idx, turn in enumerate(turns): plt.subplot( subplot_num, 2, idx + 1 ) for i in range( len(ctr) ): y = beta( alphas[i] + success[ turn, i ], betas[i] + trials[ turn, i ] - success[ turn, i ] ).pdf(x) line = plt.plot( x, y, lw = 2, label = "arm {}".format( i + 1 ) ) color = line[0].get_color() plt.fill_between( x, 0, y, alpha = 0.2, color = color ) plt.axvline( x = ctr[i], color = color, linestyle = "--", lw = 2 ) plt.title("Posteriors After {} turns".format(turn) ) plt.legend( loc = "upper right" ) return fig
def setNewEvidence(self, pos, tot): a = np.sum(pos) b = np.sum(tot) - a a_new = self.a + a b_new = self.b + b # get new PDF self.rescale((a_new + b_new) * 1.0) # some multiplicative factor y_new = np.zeros(shape=(len(self.y),), dtype=np.float) ## use normal approximation for large a and b, unfortunately we reach large a and b very quickly if a_new + b_new > 1000: y_new = self.normalApprox(a_new, b_new) else: self.rv = beta(a_new, b_new) y_new = self.rv.pdf(self.x) ## just incase something messes up if any(np.isnan(y_new)): y_new = self.normalApprox(a_new, b_new) # measure dKL and dJS before update self.measureDKL(y_new) self.measureDJS(y_new) # update self.a = a_new self.b = b_new self.y = y_new
def check_initializer_statistics(self, xp, n): from scipy import stats ws = xp.empty((n,) + self.shape, dtype=self.dtype) for i in range(n): initializer = self.target(**self.target_kwargs) initializer(xp.squeeze(ws[i:i+1], axis=0)) expected_scale = self.scale or 1.1 sampless = cuda.to_cpu(ws.reshape(n, -1).T) alpha = 0.01 / len(sampless) ab = 0.5 * (self.dim_in - 1) for samples in sampless: if self.dim_in == 1: numpy.testing.assert_allclose(abs(samples), expected_scale) _, p = stats.chisquare((numpy.sign(samples) + 1) // 2) else: _, p = stats.kstest( samples, stats.beta( ab, ab, loc=-expected_scale, scale=2*expected_scale ).cdf ) assert p >= alpha
def test_random_vector(self): comp = (stats.expon(), stats.beta(0.4, 0.8), stats.norm()) rv = best.random.RandomVectorIndependent(comp) print str(rv) x = rv.rvs() print 'One sample: ', x print 'pdf:', rv.pdf(x) x = rv.rvs(size=10) print '10 samples: ', x print 'pdf: ', rv.pdf(x) print rv.mean() print rv.var() print rv.std() print rv.stats() # Split it in two: rv1, rv2 = rv.split(0) print str(rv1) x = rv1.rvs(size=5) print x print rv1.pdf(x) print rv2.pdf(x) print str(rv2) print x x = rv2.rvs(size=5) print rv2.pdf(x) rv3, rv4 = rv1.split(0) print str(rv3) print str(rv4) rv5, rv6 = rv3.split(1) print str(rv5) print str(rv6) rv7, rv8 = rv5.split(2) print str(rv7) print str(rv8)
def _prior_scipy(self): """Return the scipy prior. For Binomial inference this the same as the marginal because there is a single model parameter.""" a = self._prior_hyperparameters['alpha'] b = self._prior_hyperparameters['beta'] return beta(a, b)
def __getBetaDistribution(self, c): # left border a = c - self._e3 / 2. # width of beta distribution b = self._e3 return beta(self._p, self._q, a, b)
def test_slice_theta_irm(): N = 10 defn = model_definition([N], [((0, 0), bbnc)]) data = np.random.random(size=(N, N)) < 0.8 view = numpy_dataview(data) r = rng() prior = {'alpha': 1.0, 'beta': 9.0} s = initialize( defn, [view], r=r, cluster_hps=[{'alpha': 2.0}], relation_hps=[prior], domain_assignments=[[0] * N]) bs = bind(s, 0, [view]) params = {0: {'p': 0.05}} heads = len([1 for y in data.flatten() if y]) tails = len([1 for y in data.flatten() if not y]) alpha1 = prior['alpha'] + heads beta1 = prior['beta'] + tails def sample_fn(): theta(bs, r, tparams=params) return s.get_suffstats(0, [0, 0])['p'] rv = beta(alpha1, beta1) assert_1d_cont_dist_approx_sps(sample_fn, rv, nsamples=50000)
def beta_cdf(x, mu, sig, a, b): s = (mu - a) / (b - a) e = (b - a) / sig q = s * s * e * e alpha = q * (1 - s) - s beta_ = q * (s - 2) + s * (1 + e * e) - 1 return beta(alpha, beta_, loc = a, scale = (b - a)).cdf(x)
def expectation(self): return scs.beta(self.successes, self.failures).rvs(1)
""" beta分布 """ import matplotlib.pyplot as plt import numpy as np from scipy import stats # 这里的值放的是alpha和beta params = [0.5, 1, 2, 3] x = np.linspace(0, 1, 100) f, ax = plt.subplots(len(params), len(params), sharex=True, sharey=True) for i in range(4): for j in range(4): a = params[i] b = params[j] # pdf概率分布相关 y = stats.beta(a, b).pdf(x) ax[i, j].plot(x, y) ax[i, j].plot(0, 0, label="$\\alpha$={:3.2f}\n$\\beta$={:3.2f}".format(a, b), alpha=0) ax[i, j].legend(fontsize=8) ax[3, 0].set_xlabel('$\\theta$', fontsize=16) ax[0, 0].set_ylabel('$p(\\theta)$', fontsize=16) # 保存为图片 # plt.savefig('bata.png', dpi=300, figsize=(5.5, 5.5)) plt.show()
mean_class_2 = X_class_2.mean() Prior_prob = [1 / 3, 1 / 3, 1 / 3] y_pred = classify_using_bayes(X_test) test_accuracy = np.mean(y_pred == y_test) print(test_accuracy * 100) # The accuracy of ML estimate is 100% until model is training and test set size is 50%. When test size increases more than training set, accuracy starts to decrease from 100% # Probability Distributions a = 0.1 b = 0.1 X = np.linspace(0 + 1e-5, 1 - 1e-5, 10000) #beta dist is only defined for x = [0,1] rv = beta(a, b) plt.plot(X, rv.pdf(X)) print("Mean is ", np.mean(rv.pdf(X))) print("Variance is ", np.var(rv.pdf(X))) a = 1 b = 1 X = np.linspace(0, 1, 10000) rv = beta(a, b) plt.plot(X, rv.pdf(X)) print("Mean is ", np.mean(rv.pdf(X))) print("Variance is ", np.var(rv.pdf(X))) a = 2 b = 3 X = np.linspace(0, 1, 10000)
def test_logpdf_ticket_1866(self): alpha, beta = 267, 1472 x = np.array([0.2, 0.5, 0.6]) b = stats.beta(alpha, beta) assert_allclose(b.logpdf(x).sum(), -1201.699061824062) assert_allclose(b.pdf(x), np.exp(b.logpdf(x)))
def model_selection_cv(pca_trn, pca_tst, y_train, y_test): #warnings.filterwarnings("ignore") # Load workspace variable from saved file #groupby_mean = pd.read_csv('ml-latest/base.csv') # -------------------------matrix of the numerical features-------------------------# ##import matplotlib.pyplot as plt ##cax = plt.matshow(np.cov(D_Arr[:,:-1].T)) ##cax = plt.matshow(np.cov(D_Arr[:,0:15].T)) ##plt.clim(-1,1) ##plt.colorbar(cax) ##plt.title('Covariance matrix of numerical features') ##plt.show() #rating = groupby_mean.rating #groupby_mean.drop(['rating'], axis=1, inplace=True) # Split the dataset in the ratio train:test = 0.9:0.1 #X_train, X_test, y_train, y_test = model_selection.train_test_split(groupby_mean, rating, test_size=0.1, # random_state=0) X_train, X_test = pca_trn, pca_tst # # Create OLS linear regression object # regrOLS = linear_model.LinearRegression() # # # Perform 5 fold cross-validation and store the MSE resulted from each fold # scores = model_selection.cross_val_score(regrOLS, X_train, y_train, scoring='r2', cv=5) # # # Note: Due to a known issue in scikit-learn the results return are flipped in sign # print('OLS: Least CV error: %.2f\n' % np.min(-scores)) # # # ---------------- Cross validation for Ridge and Lasso ------------------------# # # Range of hyper-parameters to choose for CV # lambdas = [0.0001, 0.001, 0.01, 0.02, 0.05, 0.1, 1, 10] # for l in lambdas: # print('Lambda = %.5f' % l) # # Start time for the 5-fold CV # start = time.time() # # Create ridge regression object # knn_reg = linear_model.Ridge(alpha=l) # scores = model_selection.cross_val_score(knn_reg, X_train, y_train, scoring='r2', cv=5) # end = time.time() # t = end - start # print('Ridge: Least CV error: %.2f and time : %.3f' % (np.min(-scores), t)) # start = time.time() # # Create lasso object # regrLasso = linear_model.Lasso(alpha=l) # scores = model_selection.cross_val_score(regrLasso, X_train, y_train, scoring='r2', cv=5) # # Measure and compute time for the 5-fold CV # end = time.time() # t = end - start # print('Lasso: Least CV error: %.2f and time : %.3f' % (np.min(-scores), t)) # print('\n') # # # -------------------- Cross validation for Elastic Net ------------------------# # # Range of hyper-parameters to choose for CV # l1Ratios = [0.1, 0.25, 0.5, 0.75, 0.9] # for l in lambdas: # print('Lambda = %.5f' % l) # for l1R in l1Ratios: # start = time.time() # # Create elastic net object # regrElasNet = linear_model.ElasticNet(alpha=l, l1_ratio=l1R) # scores = model_selection.cross_val_score(regrElasNet, X_train, y_train, scoring='r2', # cv=5) # end = time.time() # t = end - start # print('Elastic Net: l1Ratio = %.2f, Least CV error: %.2f and time : %.3f' % (l1R, np.min(-scores), t)) # print('\n') # ------------- Cross validation for Random Forest Regressor -------------------# # Range of hyper-parameters to choose for CV n_estimator = [1, 2, 5, 10, 20, 35, 50, 100, 200] maxFeatures = [0.25, 0.5, 0.75, 1] maxDepth = [3, 6, 8, 10, 15, 25] for n in n_estimator: for mf in maxFeatures: for d in maxDepth: print('Number of trees/estimators = %d, max depth = %d' % (n, d)) start = time.time() # Create Random Forest Regressor object randFor = RandomForestRegressor(max_depth=d, random_state=0, n_estimators=n, max_features=mf) scores = model_selection.cross_val_score(randFor, X_train, y_train, scoring='r2', cv=5) end = time.time() t = end - start print( 'Random forest regressor: %% of features = %.2f, Least CV error: %.2f and time : %.3f' % (100 * mf, np.min(-scores), t)) print('\n') # ------------- Cross validation for regressor using AdaBoost -----------------# # Range of hyper-parameters to choose for CV n_estimator = [1, 2, 5, 10, 20, 35, 50, 100, 200] learning_rate = ['linear', 'square', 'exponential'] maxDepth = [3, 6, 8, 10, 15, 25] for l in learning_rate: for n in n_estimator: for d in maxDepth: print('Number of trees/estimators = %d, max depth = %d' % (n, d)) start = time.time() # Create Boosting Regressor object boosting = AdaBoostRegressor( DecisionTreeRegressor(max_depth=d), random_state=0, n_estimators=n, loss=l) scores = model_selection.cross_val_score(boosting, X_train, y_train, scoring='r2', cv=5, n_jobs=1) end = time.time() t = end - start print( 'Regressor using AdaBoosting: loss type = %s, Least CV error: %.2f and time : %.3f' % (l, np.min(-scores), t)) print('\n') # ---------------- Cross validation for KNeighborsRegressor------------------------# # Range of hyper-parameters to choose for CV lambdas = [2, 3, 4, 5, 6, 7, 8, 9] for l in lambdas: print('Lambda = %.5f' % l) # Start time for the 5-fold CV start = time.time() # Create KNeighborsRegressor object knn_reg = KNeighborsRegressor(n_neighbors=l) scores = model_selection.cross_val_score(knn_reg, X_train, y_train, scoring='r2', cv=5) end = time.time() t = end - start print('n_neighbors: ', l) print('KNeighborsRegressor: Least CV error: %.2f and time : %.3f' % (np.min(-scores), t)) print('\n') # ------------- Cross validation for regressor using XGBoost -----------------# # Range of hyper-parameters to choose for CV # n_estimator = [100, 1000, 10000] # learning_rate = [0.02, 0.05, 0.07, 0.1, 0.2, 0.5, 0.7, 1] # maxDepth = [3, 6, 8, 10, 15, 25] # gamma = [0,0.03,0.1,0.3] # colsample_bytree = [0.4,0.6,0.8] # reg_alpha = [1e-5, 1e-2, 0.75] # reg_lambda = [1e-5, 1e-2, 0.45] # subsample = [0.6,0.95] # min_child_weight = [1.5,6,10] # # for l in learning_rate: # for n in n_estimator: # for d in maxDepth: # for g in gamma: # for c in colsample_bytree: # for alp in reg_alpha: # for lam in reg_lambda: # for s in subsample: # for min_child in min_child_weight: # print('Number of trees/estimators = %d, max depth = %d' % (n, d)) # start = time.time() # # Create Boosting Regressor object # xgb_model = xgboost.XGBRegressor(colsample_bytree=c, # gamma=g, # learning_rate=l, # max_depth=d, # min_child_weight=min_child, # n_estimators=n, # reg_alpha=alp, # reg_lambda=lam, # subsample=s, # seed=42) # scores = model_selection.cross_val_score(xgb_model, X_train, y_train, scoring='r2', # cv=5, n_jobs=1) # end = time.time() # t = end - start # print("Regressor using XGboosting: learning rate = %s, Least CV error: %.2f, " # "gamma =.3f, min_child_weight =.4f, n_estimators = .5f, reg_alpha = " # ".6f, reg_lambda = .7f, subsample= .8f, max_depth = %.9f and time : %.10f" # % (l, np.min(-scores), g, min_child, n, alp, lam, s, d, t)) one_to_left = st.beta(10, 1) from_zero_positive = st.expon(0, 50) params = { "n_estimators": st.randint(100, 1000, 10000), "max_depth": st.randint(3, 40), "learning_rate": st.uniform(0.05, 0.4), "colsample_bytree": one_to_left, "subsample": one_to_left, "gamma": st.uniform(0, 10), 'reg_alpha': from_zero_positive, "min_child_weight": from_zero_positive, } xgbreg = XGBRegressor(nthreads=-1) gs = RandomizedSearchCV(xgbreg, params, n_jobs=1) gs.fit(X_train, y_train) print("Regressor using XGboosting: ", "\nBest Index: ", gs.best_index_, "\nBest estimator: ", gs.best_estimator_, "\nBest Params: ", gs.best_params_) print('\n')
pce_values = pce(validation_samples) error = np.linalg.norm(pce_values - validation_values, axis=0) if not relative: error /= np.sqrt(validation_samples.shape[1]) else: error /= np.linalg.norm(validation_values, axis=0) return error np.random.seed(1) #%% # Our goal is to demonstrate how to use a polynomial chaos expansion (PCE) to approximate a function :math:`f(z): \reals^d \rightarrow \reals` parameterized by the random variables :math:`z=(z_1,\ldots,z_d)`. with the joint probability density function :math:`\pdf(\V{\rv})`. In the following we will use a function commonly used in the literature, the oscillatory Genz function. This function is well suited for testing as the number of variables and the non-linearity can be adjusted. We define the random variables and the function with the following code univariate_variables = [uniform(), beta(3, 3)] variable = pya.IndependentMultivariateRandomVariable(univariate_variables) c = np.array([10, 0.01]) model = GenzFunction("oscillatory", variable.num_vars(), c=c, w=np.zeros_like(c)) #%% # Here we have intentionally set the coefficients :math:`c`: of the Genz function to be highly anisotropic, to emphasize the properties of the adaptive algorithm. # # PCE represent the model output :math:`f(\V{\rv})` as an expansion in orthonormal polynomials, # # .. math:: #
def __init__(self, low: None, peak: None, high: None, gamma=4.0): self.a = low self.b = peak self.c = high self.g = gamma self.range = (self.c - self.a) if self.a is None or self.b is None or self.c is None: raise ValueError('Parameters low, peak and high must be specified') if self.g <= 0: raise ValueError( 'g parameter should be greater than 0. By default is 4.0') self.mean = round((self.a + (self.g * self.b) + self.c) / (self.g + 2), 4) if self.mean == self.b: self.alpha = self.beta = 3.0 else: self.alpha = round( ((self.mean - self.a) * (2 * self.b - self.a - self.c)) / ((self.b - self.mean) * (self.c - self.a)), 4) self.beta = round( self.alpha * (self.c - self.mean) / (self.mean - self.a), 4) self.dist = ss.beta(self.alpha, self.beta, loc=self.a, scale=self.range) self.parameters = np.array([self.alpha, self.beta]) self.median = round( (self.a + ((2 + self.g) * self.b) + self.c) / (4 + self.g), 4) self.mode = round(self.b, 4) self.variance = round( ((self.mean - self.a) * (self.c - self.mean)) / (self.g + 4), 4) self.skewness = round( (2 * (self.beta - self.alpha) * np.sqrt(self.alpha + self.beta + 1)) / ((self.alpha + self.beta + 2) * np.sqrt(self.alpha * self.beta)), 4) self.kurt = ((self.g + 2) * ( (((self.alpha - self.beta)**2) * (self.alpha + self.beta + 1)) + (self.alpha * self.beta * (self.alpha + self.beta + 2)))) / (self.alpha * self.beta * (self.alpha + self.beta + 2) * (self.alpha + self.beta + 4)) self.excess_kurtosis = round( 6 * ((self.alpha - self.beta)**2 * (self.alpha + self.beta + 1) - (self.alpha * self.beta * (self.alpha + self.beta + 2))) / (self.alpha * self.beta * (self.alpha + self.beta + 2) * (self.alpha + self.beta + 4)) + 4, 4) self.param_title = str('low=' + str(self.a) + ', peak=' + str(self.b) + ', high=' + str(self.c) + ', Gamma=' + str(self.g)) self.param_title_long = str('Beta Pert (low=' + str(self.a) + ', peak=' + str(self.b) + ', high=' + str(self.c) + ', Gamma=' + str(self.g) + ')')
sampler_lr=1e-2, prior_scale=10, adversary_weight=0.0, num_sample_mc_steps=1000, sampler_beta_min=0.02, sampler_beta_target=10, max_replay=1) max_resources = 30 if do_search == "halving": distributions = dict( lr=expon(1e-2), sampler_lr=expon(1e-1), sampler=["mala", "langevin", "tempered mala", "tempered langevin"], weight_decay=expon(1e-3), # max_iter=poisson(30), replay_prob=beta(a=9, b=1), adversary_weight=beta(a=1, b=1), num_units=poisson(32), num_layers=poisson(3), max_replay=poisson(10), ) clf_cv = HalvingRandomSearchCV(clf, distributions, random_state=0, n_jobs=5, resource="max_iter", max_resources=max_resources) search = clf_cv.fit(X.values) clf = clf_cv.best_estimator_ elif do_search == "bohb": distributions = CS.ConfigurationSpace(seed=42)
def run(): directory = "results/{}".format( datetime.now().strftime('%Y-%m-%d-%H-%M-%S')) if not os.path.exists(directory + '/trajectories/'): os.makedirs(directory + '/trajectories/') env = World(2) # env = MoveWorld() # env = MoveWorldContinuous() state = env.reset() model = EvolutionStrategies(inputs=env.state_dim, outputs=env.action_dim) experience = [] log = [] rewards = deque(maxlen=100) events = deque(maxlen=100) rewards_sat = deque(maxlen=100) events_sat = deque(maxlen=100) rewards_not_sat = deque(maxlen=100) events_not_sat = deque(maxlen=100) sats = [] c_sats = deque(maxlen=100) p_sats = deque(maxlen=100) n_sat = 0 c_sat_verification = 0 for episode in range(params.episodes): reward, n_event, _ = run_episode(model, env) # update c_sat if params.constraint: sat = int(n_event <= params.constraint) if sat: rewards_sat.append(reward) events_sat.append(n_event) else: rewards_not_sat.append(reward) events_not_sat.append(n_event) else: sat = 1 sats.append(sat) n_sat = sum(sats) # += sat successes = n_sat + 1 # incl. prior failures = len(sats) - n_sat + 1 # incl. prior c_sat = 1. - beta(successes, failures).cdf(params.p_req) p_sat = beta(successes, failures).ppf(1 - params.c_req) # direct if params.calibration == 'direct': model.c_sat = c_sat elif params.calibration == 'hard': model.c_sat = 0 if c_sat < params.c_req else 1 elif params.calibration == 'soft': model.c_sat = max(0, c_sat - params.c_req) / (1 - params.c_req) elif params.calibration == 'naive': model.c_sat = max( 0, np.mean(sats) - params.p_req) / (1 - params.p_req) # TODO: move to verify.py if params.verify and constraint is not None: if episode % 1000 == 0: # TODO: get true model: as method in evolution.py v_model = EvolutionStrategies(inputs=env.state_dim, outputs=env.action_dim) for i, param in enumerate(v_model.parameters()): param.data = model.master_weights[i] _, _, c_sat_verification, _, _ = verify(v_model, env) print(c_sat_verification) if params.constraint: model.log_reward(reward, -1 * max(n_event - params.constraint, 0)) else: model.log_reward(reward, 0) # log results rewards.append(reward) events.append(n_event) c_sats.append(c_sat) p_sats.append(p_sat) if episode % model.population_size == 0: log_entry = { 'episode': episode, 'reward': '{0:.2f}'.format(np.mean(rewards)), 'r sat': '{0:.2f}'.format(np.mean(rewards_sat)), 'r not sat': '{0:.2f}'.format(np.mean(rewards_not_sat)), 'events': '{0:.4f}'.format(np.mean(events)), 'e sat': '{0:.4f}'.format(np.mean(events_sat)), 'e not sat': '{0:.4f}'.format(np.mean(events_not_sat)), 'n_sat': '{0:.4f}'.format(np.mean(sats)), 'c_sat': '{0:.4f}'.format(np.mean(c_sats)), 'p_sat': '{0:.4f}'.format(np.mean(p_sats)), 'c_sat_verification': '{0:.4f}'.format(np.mean(c_sat_verification)), 'constraint': params.constraint, 'calibration': params.calibration, 'lr': params.learning_rate } log.append(log_entry) df = pd.DataFrame(log) df.to_csv(directory + '/log.csv') print(log_entry) if params.render: ImgRenderer( directory + '/trajectories/' + str('%.4f' % reward) + '_' + str('%.4f' % n_event) + '_' + str(episode), env).render_img()
def plot_loo_pit( idata=None, y=None, y_hat=None, log_weights=None, ecdf=False, ecdf_fill=True, n_unif=100, use_hdi=False, hdi_prob=None, figsize=None, textsize=None, labeller=None, color="C0", legend=True, ax=None, plot_kwargs=None, plot_unif_kwargs=None, hdi_kwargs=None, fill_kwargs=None, backend=None, backend_kwargs=None, show=None, ): """Plot Leave-One-Out (LOO) probability integral transformation (PIT) predictive checks. Parameters ---------- idata : InferenceData InferenceData object. y : array, DataArray or str Observed data. If str, idata must be present and contain the observed data group y_hat : array, DataArray or str Posterior predictive samples for ``y``. It must have the same shape as y plus an extra dimension at the end of size n_samples (chains and draws stacked). If str or None, idata must contain the posterior predictive group. If None, y_hat is taken equal to y, thus, y must be str too. log_weights : array or DataArray Smoothed log_weights. It must have the same shape as ``y_hat`` ecdf : bool, optional Plot the difference between the LOO-PIT Empirical Cumulative Distribution Function (ECDF) and the uniform CDF instead of LOO-PIT kde. In this case, instead of overlaying uniform distributions, the beta ``hdi_prob`` around the theoretical uniform CDF is shown. This approximation only holds for large S and ECDF values not vary close to 0 nor 1. For more information, see `Vehtari et al. (2019)`, `Appendix G <https://avehtari.github.io/rhat_ess/rhat_ess.html>`_. ecdf_fill : bool, optional Use fill_between to mark the area inside the credible interval. Otherwise, plot the border lines. n_unif : int, optional Number of datasets to simulate and overlay from the uniform distribution. use_hdi : bool, optional Compute expected hdi values instead of overlaying the sampled uniform distributions. hdi_prob : float, optional Probability for the highest density interval. Works with ``use_hdi=True`` or ``ecdf=True``. figsize : figure size tuple, optional If None, size is (8 + numvars, 8 + numvars) textsize: int, optional Text size for labels. If None it will be autoscaled based on figsize. labeller : labeller instance, optional Class providing the method `make_pp_label` to generate the labels in the plot titles. Read the :ref:`label_guide` for more details and usage examples. color : str or array_like, optional Color of the LOO-PIT estimated pdf plot. If ``plot_unif_kwargs`` has no "color" key, an slightly lighter color than this argument will be used for the uniform kde lines. This will ensure that LOO-PIT kde and uniform kde have different default colors. legend : bool, optional Show the legend of the figure. ax: axes, optional Matplotlib axes or bokeh figures. plot_kwargs : dict, optional Additional keywords passed to ax.plot for LOO-PIT line (kde or ECDF) plot_unif_kwargs : dict, optional Additional keywords passed to ax.plot for overlaid uniform distributions or for beta credible interval lines if ``ecdf=True`` hdi_kwargs : dict, optional Additional keywords passed to ax.axhspan fill_kwargs : dict, optional Additional kwargs passed to ax.fill_between backend: str, optional Select plotting backend {"matplotlib","bokeh"}. Default "matplotlib". backend_kwargs: bool, optional These are kwargs specific to the backend being used. For additional documentation check the plotting method of the backend. show : bool, optional Call backend show function. Returns ------- axes : matplotlib axes or bokeh figures References ---------- * Gabry et al. (2017) see https://arxiv.org/abs/1709.01449 * https://mc-stan.org/bayesplot/reference/PPC-loo.html * Gelman et al. BDA (2014) Section 6.3 Examples -------- Plot LOO-PIT predictive checks overlaying the KDE of the LOO-PIT values to several realizations of uniform variable sampling with the same number of observations. .. plot:: :context: close-figs >>> import arviz as az >>> idata = az.load_arviz_data("radon") >>> az.plot_loo_pit(idata=idata, y="y") Fill the area containing the 94% highest density interval of the difference between uniform variables empirical CDF and the real uniform CDF. A LOO-PIT ECDF clearly outside of these theoretical boundaries indicates that the observations and the posterior predictive samples do not follow the same distribution. .. plot:: :context: close-figs >>> az.plot_loo_pit(idata=idata, y="y", ecdf=True) """ if ecdf and use_hdi: raise ValueError("use_hdi is incompatible with ecdf plot") if labeller is None: labeller = BaseLabeller() loo_pit = _loo_pit(idata=idata, y=y, y_hat=y_hat, log_weights=log_weights) loo_pit = loo_pit.flatten() if isinstance( loo_pit, np.ndarray) else loo_pit.values.flatten() loo_pit_ecdf = None unif_ecdf = None p975 = None p025 = None loo_pit_kde = None hdi_odds = None unif = None x_vals = None if hdi_prob is None: hdi_prob = rcParams["stats.hdi_prob"] else: if not 1 >= hdi_prob > 0: raise ValueError( "The value of hdi_prob should be in the interval (0, 1]") if ecdf: loo_pit.sort() n_data_points = loo_pit.size loo_pit_ecdf = np.arange(n_data_points) / n_data_points # ideal unnormalized ECDF of uniform distribution with n_data_points points # it is used indistinctively as x or p(u<x) because for u~U(0,1) they are equal unif_ecdf = np.arange(n_data_points + 1) p975 = stats.beta.ppf(0.5 + hdi_prob / 2, unif_ecdf + 1, n_data_points - unif_ecdf + 1) p025 = stats.beta.ppf(0.5 - hdi_prob / 2, unif_ecdf + 1, n_data_points - unif_ecdf + 1) unif_ecdf = unif_ecdf / n_data_points else: x_vals, loo_pit_kde = kde(loo_pit) unif = np.random.uniform(size=(n_unif, loo_pit.size)) if use_hdi: n_obs = loo_pit.size hdi_ = stats.beta(n_obs / 2, n_obs / 2).ppf((1 - hdi_prob) / 2) hdi_odds = (hdi_ / (1 - hdi_), (1 - hdi_) / hdi_) loo_pit_kwargs = dict( ax=ax, figsize=figsize, ecdf=ecdf, loo_pit=loo_pit, loo_pit_ecdf=loo_pit_ecdf, unif_ecdf=unif_ecdf, p975=p975, p025=p025, fill_kwargs=fill_kwargs, ecdf_fill=ecdf_fill, use_hdi=use_hdi, x_vals=x_vals, hdi_kwargs=hdi_kwargs, hdi_odds=hdi_odds, n_unif=n_unif, unif=unif, plot_unif_kwargs=plot_unif_kwargs, loo_pit_kde=loo_pit_kde, textsize=textsize, labeller=labeller, color=color, legend=legend, y_hat=y_hat, y=y, hdi_prob=hdi_prob, plot_kwargs=plot_kwargs, backend_kwargs=backend_kwargs, show=show, ) if backend is None: backend = rcParams["plot.backend"] backend = backend.lower() # TODO: Add backend kwargs plot = get_plotting_function("plot_loo_pit", "loopitplot", backend) axes = plot(**loo_pit_kwargs) return axes
def get_prior(prior, verbose=False): prior_lst = [] initv = [] lb = [] ub = [] if verbose: print('Adding parameters to the prior distribution...') for pp in prior: dist = prior[str(pp)] if len(dist) == 3: initv.append(None) lb.append(None) ub.append(None) ptype = dist[0] pmean = dist[1] pstdd = dist[2] elif len(dist) == 6: if dist[0] == 'None': initv.append(None) else: initv.append(dist[0]) lb.append(dist[1]) ub.append(dist[2]) ptype = dist[3] pmean = dist[4] pstdd = dist[5] else: raise NotImplementedError( 'Shape of prior specification of %s is unclear (!=3 & !=6).' % pp) # simply make use of frozen distributions if str(ptype) == 'uniform': prior_lst.append(ss.uniform(loc=pmean, scale=pstdd - pmean)) elif str(ptype) == 'normal': prior_lst.append(ss.norm(loc=pmean, scale=pstdd)) elif str(ptype) == 'gamma': b = pstdd**2 / pmean a = pmean / b prior_lst.append(ss.gamma(a, scale=b)) elif str(ptype) == 'beta': a = (1 - pmean) * pmean**2 / pstdd**2 - pmean b = a * (1 / pmean - 1) prior_lst.append(ss.beta(a=a, b=b)) elif str(ptype) == 'inv_gamma': def targf(x): y0 = ss.invgamma(x[0], scale=x[1]).std() - pstdd y1 = ss.invgamma(x[0], scale=x[1]).mean() - pmean return np.array([y0, y1]) ig_res = so.root(targf, np.array([4, 4]), method='lm') if ig_res['success'] and np.allclose(targf(ig_res['x']), 0): prior_lst.append( ss.invgamma(ig_res['x'][0], scale=ig_res['x'][1])) else: raise ValueError( 'Can not find inverse gamma distribution with mean %s and std %s' % (pmean, pstdd)) elif str(ptype) == 'inv_gamma_dynare': s, nu = inv_gamma_spec(pmean, pstdd) ig = InvGammaDynare()(s, nu) # ig = ss.invgamma(nu/2, scale=s/2) prior_lst.append(ig) else: raise NotImplementedError(' Distribution *not* implemented: ', str(ptype)) if verbose: if len(dist) == 3: print(' parameter %s as %s with mean %s and std/df %s...' % (pp, ptype, pmean, pstdd)) if len(dist) == 6: print( ' parameter %s as %s (%s, %s). Init @ %s, with bounds (%s, %s)...' % (pp, ptype, pmean, pstdd, dist[0], dist[1], dist[2])) return prior_lst, initv, (lb, ub)
tau_1 = .05 # 'Low' treatment effect tau_2 = .1 # 'High' treatment effect MDE_bar = [tau_1, tau_2, tau_2 - tau_1] # Specify mean of village level adoption effect p_v = .033 # Specify mean of individual level adoption effect p_i = .017 # Set up distribution for mean village level adoption rates, which will be used # later to simulate adoption decisions, but is needed now to estimate the # variance of village level and individual level effects alpha_v = p_v * 100 # First parameter of the beta distribution beta_v = 100 - alpha_v # Second parameter of the beta distribution F_v = beta(alpha_v, beta_v) # Full beta distribution # Set up distribution for individual level adoption rates alpha_i = p_i * 100 beta_i = 100 - alpha_i F_i = beta(alpha_i, beta_i) # Set number of observations to use for variance calculation nvar = 100000 # Draw sample of individual level parameters samp_i = F_i.rvs(size=(nvar, 1)) # Convert to Bernoulli random variables samp_i = np.random.binomial(1, samp_i, size=(nvar, 1))
seed_selection_list = [2] direction = 'python/data/example4/burnin_study/' for seed_selection_strategy in seed_selection_list: for burnin in burn_in_list: # file-name filename = direction + 'mp_liebscher_N' + repr(N) + \ '_Nsim' + repr(n_simulations) + '_b' + repr(burnin) + '_' + sampling_method + \ '_sss' + repr(seed_selection_strategy) # parameters for beta-distribution p = 6.0 q = 6.0 beta_distr = scps.beta(p, q, loc=-2, scale=8) # transformation to/from U-space phi = lambda x: scps.norm.cdf(x) phi_inv = lambda x: scps.norm.ppf(x) #CDF = lambda x: scps.beta.cdf(x, p, q) CDF = lambda x: beta_distr.cdf(x) #CDF_inv = lambda x: scps.beta.ppf(x, p, q) CDF_inv = lambda x: beta_distr.ppf(x) transform_U2X = lambda u: CDF_inv(phi(u)) transform_X2U = lambda x: phi_inv(CDF(x)) # limit-state function z = lambda x: 8 * np.exp(-(x[0]**2 + x[1]**2)) + 2 * np.exp(-(
#print(data) with pm.Model() as model_h: alpha = pm.HalfCauchy('alpha', beta=10) beta = pm.HalfCauchy('beta', beta=10) theta = pm.Beta('theta', alpha, beta, shape=len(N_samples)) y = pm.Bernoulli('y', p=theta[group_idx], observed=data) trace_h = pm.sample(2000) chain_h = trace_h[200:] pm.traceplot(chain_h) pm.summary(chain_h) plt.savefig('img314.png') plt.clf() print(chain_h) x = np.linspace(0, 1, 100) for i in np.random.randint(0, len(chain_h), size=100): pdf = stats.beta(chain_h['alpha'][i], chain_h['beta'][i]).pdf(x) plt.plot(x, pdf, 'g', alpha=0.5) dist = stats.beta(chain_h['alpha'].mean(), chain_h['beta'].mean()) pdf = dist.pdf(x) mode = x[np.argmax(pdf)] mean = dist.moment(1) plt.plot(x, pdf, label='mode={:.2f}\nmean={:2f}'.format(mode, mean)) plt.legend(fontsize=14) plt.xlabel(r'$\theta_{prior}$', fontsize=16) plt.savefig('img315.Png')
def mean(self): return self.successes / (self.successes + self.failures) #%% probabilities = [.28, .3, .32] bandits = [Bandit(p) for p in probabilities] pulls = [0, 0, 0] wins = [0, 0, 0] epsilon = 1 for i in range(1000): index = np.argmax([bandit.expectation() for bandit in bandits]) result = bandits[index].pull() pulls[index] += 1 if result == 1: wins[index] += 1 bandits[index].update(result) print(pulls) print(wins) #%% n = 10000 A = scs.beta(bandits[0].successes, bandits[0].failures).rvs(n) B = scs.beta(bandits[2].successes, bandits[2].failures).rvs(n) print((B - .04 > A).mean())
import matplotlib.pyplot as plt plt.style.use(["seaborn-paper"]) from pysim.information.entropy import marginal_entropy seed = 123 np.random.seed(seed) n_samples = 1_000 a = 5 b = 10 # initialize data distribution data_dist1 = stats.gamma(a=a) data_dist2 = stats.beta(a=a, b=b) # get some samples X1_samples = data_dist1.rvs(size=n_samples)[:, None] X2_samples = data_dist2.rvs(size=n_samples)[:, None] X_samples = np.hstack([X1_samples, X2_samples]) assert X_samples.shape[1] == 2 sns.jointplot(X_samples[:, 0], X_samples[:, 1]) plt.show() # =========================== # True Entropy # =========================== H1_true = data_dist1.entropy()
from IPython.core.pylabtools import figsize from matplotlib import pyplot as plt from scipy import stats as st import numpy as np visit_A = 1300 visit_B = 1275 conversion_A = 120 conversion_B = 125 alpha = 1 beta = 1 n_samples = 1000 posterior_A = st.beta(alpha + conversion_A, beta + visit_A - conversion_A) posterior_B = st.beta(alpha + conversion_B, beta + visit_B - conversion_B) posterior_samples_A = st.beta(alpha + conversion_A, beta + visit_A - conversion_A).rvs(n_samples) posterior_samples_B = st.beta(alpha + conversion_B, beta + visit_B - conversion_B).rvs(n_samples) # posterior mean print("{}% chance of A site better than B".format( (posterior_samples_A > posterior_samples_B).mean())) figsize(12.5, 4) #------------------------------------------------------------------ # Posterior Dist of A and B fig, axes = plt.subplots(1, 2, figsize=(10, 4))
X_descriptive = pd.read_pickle( "./template/descriptive_stats/X_descriptive_relative_GB.pkl") test.rename(columns={"Wingate": "Wattbike"}, inplace=True) features = test.iloc[:, 0:18].columns.tolist() target = test.iloc[:, 18:].columns.tolist() test.dropna(inplace=True) test.reset_index(drop=True, inplace=True) corr_matrix = test.corr() corr_matrix = corr_matrix.loc[target, features].T n = 72 dist = ss.beta(n / 2 - 1, n / 2 - 1, loc=-1, scale=2) p = 2 * dist.cdf(-abs(corr_matrix)) p = pd.DataFrame(p, columns=target, index=features) labels = corr_matrix.round(2).astype(str) p_value = p for i in labels: print(i) for index, value in labels[i].items(): print(index, value) if p_value.loc[index, i] <= 0.01: labels.loc[index, i] = value + '**' elif p_value.loc[index, i] <= 0.05: labels.loc[index, i] = value + '*' else:
import pandas as pd import numpy as np import matplotlib.pyplot as plt from scipy.stats import bernoulli, beta # Beta(a, b) parameters for specified mean and variance beta_a = lambda mean, var: mean*(mean*(1-mean)/var-1) beta_b = lambda mean, var: (1-mean)*(mean*(1-mean)/var-1) Beta = lambda mean_p, var_p: beta(beta_a(mean_p, var_p), beta_b(mean_p, var_p)) # Generate a data set of unemployement sequences given entry and exit distributions def sample_data(N, T, P, Q): data = [] rates = [] for sample in range(N): p, q = P.rvs(), Q.rvs() data.extend([(sample, time, spell, timein, unemployed, event) for (time, spell, timein, unemployed, event) in sample_sequence(T, p, q)]) rates.append((sample, p, q)) data = pd.DataFrame(data, columns=['sample', 'time', 'spell', 'timein', 'unemployed', 'event']) rates = pd.DataFrame(rates, columns=['sample', 'entry', 'exit']) return(data, rates) # Generate a single sequence of observations given entry and exit rates def sample_sequence(T, enter, exit): history = [] spell = 0 timein = 1 enter = 1e-6 if enter < 1e-6 else enter exit = 1e-6 if exit < 1e-6 else exit steady_state = enter / (enter + exit) unemployed = bernoulli.rvs(steady_state)
def main(argv): # Get and parse the command line arguments image_loc, target_name = get_arguments(argv) # Read the input image img = cv2.imread(image_loc, 0) # Check if image exists or not if (img is None): print ("Cannot open {} image".format(image_loc)) print ("Make sure you provide the correct image path") sys.exit(2) # Calculate the input images' histogram input_hist = cv2.calcHist([img], [0], None, [256], [0,256]) # Normalize the input histogram total = sum(input_hist) input_hist /= total # Calculate the cumulative input histogram cum_input_hist = [] cum = 0.0 for i in range(len(input_hist)): cum += input_hist[i][0] cum_input_hist.append(cum) # Calculate the variance of the image input_img_var = np.var(img) # Calculate the variance of the image square input_img_sqr_var = np.var(img**2) # Calculate the target dist for diff dist's target_dist = [] target_hist = None if (target_name == "uniform"): # Import the package of the target distribution from scipy.stats import uniform # Create uniform distribution object unif_dist = uniform(0, 246) # Calculate the target distribution for i in range(0, 246): x = unif_dist.pdf(i) target_dist.append(x) for i in range(246, 256): target_dist.append(0) # Calculate the target histogram target_hist = np.ndarray(shape=(256,1)) for i in range(0,256): target_hist[i][0] = target_dist[i] elif (target_name == "normal"): # Import the package of the target distribution from scipy.stats import norm # Create standard normal distribution object norm_dist = norm(0, 1) # Calculate the target distribution for i in range(0, 256): x = norm_dist.pdf(i/42.0 - 3) target_dist.append(x) # Calculate the target histogram target_hist = np.ndarray(shape=(256,1)) for i in range(0,256): target_hist[i][0] = target_dist[i] # Normalize the target histogram total = sum(target_hist) target_hist /= total elif (target_name == "rayleigh"): # Import the package of the target distribution from scipy.stats import rayleigh # Create rayleigh distribution object rayleigh_dist = rayleigh(0.5) # Calculate the target distribution for i in range(0, 256): x = rayleigh_dist.pdf(i/128.0) target_dist.append(x) # Calculate the target histogram target_hist = np.ndarray(shape=(256,1)) for i in range(0,256): target_hist[i][0] = target_dist[i] # Normalize the target histogram total = sum(target_hist) target_hist /= total elif (target_name == "gamma"): # Import the package of the target distribution from scipy.stats import gamma # Create gamma distribution object gamma_dist = gamma(0.5, 0, 1.0) # Calculate the target distribution target_dist.append(1) for i in range(1, 256): x = gamma_dist.pdf(i/256.0) target_dist.append(x) # Calculate the target histogram target_hist = np.ndarray(shape=(256,1)) for i in range(0,256): target_hist[i][0] = target_dist[i] # Normalize the target histogram total = sum(target_hist) target_hist /= total elif (target_name == "weibull"): # Import the package of the target distribution from scipy.stats import weibull_min # Create weibull distribution object weibull_dist = weibull_min(c=1.4, scale=input_img_var) # Calculate the target distribution for i in range(0, 256): x = weibull_dist.pdf(i/256.0) target_dist.append(x) # Calculate the target histogram target_hist = np.ndarray(shape=(256,1)) for i in range(0,256): target_hist[i][0] = target_dist[i] # Normalize the target histogram total = sum(target_hist) target_hist /= total elif (target_name == "beta1"): # Import the package of the target distribution from scipy.stats import beta # Create beta distribution object beta_dist = beta(0.5, 0.5) # Calculate the target distribution target_dist.append(6) for i in range(1, 255): x = beta_dist.pdf(i/256.0) target_dist.append(x) target_dist.append(6) # Calculate the target histogram target_hist = np.ndarray(shape=(256,1)) for i in range(0,256): target_hist[i][0] = target_dist[i] # Normalize the target histogram total = sum(target_hist) target_hist /= total elif (target_name == "beta2"): # Import the package of the target distribution from scipy.stats import beta # Create beta distribution object beta_dist = beta(5, 1) # Calculate the target distribution for i in range(0, 255): x = beta_dist.pdf(i/256.0) target_dist.append(x) target_dist.append(6) # Calculate the target histogram target_hist = np.ndarray(shape=(256,1)) for i in range(0,256): target_hist[i][0] = target_dist[i] # Normalize the target histogram total = sum(target_hist) target_hist /= total elif (target_name == "lognorm"): # Import the package of the target distribution from scipy.stats import lognorm # Create lognorm distribution object lognorm_dist = lognorm(1) # Calculate the target distribution for i in range(0, 256): x = lognorm_dist.pdf(i/100.0) target_dist.append(x) # Calculate the target histogram target_hist = np.ndarray(shape=(256,1)) for i in range(0,256): target_hist[i][0] = target_dist[i] # Normalize the target histogram total = sum(target_hist) target_hist /= total elif (target_name == "laplace"): # Import the package of the target distribution from scipy.stats import laplace # Create lognorm distribution object laplace_dist = laplace(4) # Calculate the target distribution target_dist.append(0) for i in range(1, 256): x = laplace_dist.pdf(i/256.0) target_dist.append(x) # Calculate the target histogram target_hist = np.ndarray(shape=(256,1)) for i in range(0,256): target_hist[i][0] = target_dist[i] # Normalize the target histogram total = sum(target_hist) target_hist /= total elif (target_name == "beta3"): # Import the package of the target distribution from scipy.stats import beta # Create beta distribution object beta_dist = beta(8, 2) # Calculate the target distribution for i in range(0, 255): x = beta_dist.pdf(i/256.0) target_dist.append(x) target_dist.append(0) # Calculate the target histogram target_hist = np.ndarray(shape=(256,1)) for i in range(0,256): target_hist[i][0] = target_dist[i] # Normalize the target histogram total = sum(target_hist) target_hist /= total else: # Image itself is a target distribution case # Read the image target_dist = cv2.imread(target_name, 0) # Check if image is read or not if (target_dist is None): print ("{} is not a valid target name (or) image does not exist".format(target_name)) print ("Make sure you give correct target name or correct target image location") sys.exit(2) # Create target histogram from the image target_hist = cv2.calcHist([target_dist], [0], None, [256], [0,256]) # Normalize the target histogram total = sum(target_hist) target_hist /= total # Calculate the cumulative target histogram cum_target_hist = [] cum = 0.0 for i in range(len(target_hist)): cum += target_hist[i][0] cum_target_hist.append(cum) # Obtain the mapping from the input hist to target hist lookup = {} for i in range(len(cum_input_hist)): min_val = abs(cum_target_hist[0] - cum_input_hist[i]) min_j = 0 for j in range(1, len(cum_target_hist)): val = abs(cum_target_hist[j] - cum_input_hist[i]) if (val < min_val): min_val = val min_j = j lookup[i] = min_j # Create the transformed image using the img's pixel values and the lookup table trans_img = img.copy() for i in range(img.shape[0]): for j in range(img.shape[1]): trans_img[i][j] = lookup[img[i][j]] # Write the transformed image to a png file cv2.imwrite('images/transformed.png', trans_img) # Plot the input image and the target image in one plot input_img_resized = cv2.resize(img, (0,0), None, 0.25, 0.25) trans_img_resized = cv2.resize(trans_img, (0,0), None, 0.25, 0.25) numpy_horiz = np.hstack((input_img_resized, trans_img_resized)) cv2.imshow('Input image ------------------------ Trans image', numpy_horiz) cv2.waitKey(25) # Calculate the transformed image's histogram trans_hist = cv2.calcHist([trans_img], [0], None, [256], [0,256]) # Normalize the transformed image's histogram total = sum(trans_hist) trans_hist /= total # Convert cum_input_hist to matrix for plotting cum_input_hist_matrix = np.ndarray(shape=(256,1)) for i in range(0,256): cum_input_hist_matrix[i][0] = cum_input_hist[i] # Calculate the cum transformed histogram for plotting cum_trans_hist = np.ndarray(shape=(256,1)) cum = 0.0 for i in range(0,256): cum += trans_hist[i][0] cum_trans_hist[i][0] = cum # Convert cum_target_hist to matrix for plotting cum_target_hist_matrix = np.ndarray(shape=(256,1)) for i in range(0,256): cum_target_hist_matrix[i][0] = cum_target_hist[i] plt.subplot(2, 3, 1) plt.title('Original hist') plt.plot(input_hist) plt.subplot(2, 3, 2) plt.title('Original cdf') plt.plot(cum_input_hist_matrix) plt.subplot(2, 3, 3) plt.title('Target pdf') plt.plot(target_hist) plt.subplot(2, 3, 4) plt.title('Transformed hist') plt.plot(trans_hist) plt.subplot(2, 3, 5) plt.title('Transformed cdf') plt.plot(cum_trans_hist) plt.subplot(2, 3, 6) plt.title('Target cdf') plt.plot(cum_target_hist_matrix) plt.show()
#-------------------------------------------------------------------------------------------------------------- from scipy.stats import beta #fit beta to previous CTRs prior_parameters = beta.fit(click_through_rates , floc = 0 , fscale = 1) prior_parameters #extract a,b from fit prior_a, prior_b = prior_parameters[0:2] prior_a prior_b #define prior distribution sample from prior prior_distribution = beta(prior_a, prior_b) prior_distribution #get histogram of samples prior_samples = prior_distribution.rvs(10000) #rvs : produces a single value of a pseudorandom variable prior_samples #get histogram of samples fit_counts, bins = np.histogram(prior_samples, zero_to_one) fit_counts #normalize histogram fit_counts = map(lambda x: float(x)/fit_counts.sum(), fit_counts) fit_counts #plot
plt.colorbar() # In[782]: x = np.arange(0, .2, 0.0001) cmap = list(plt.cm.tab10(list(range(len(machines))))) plt.figure(figsize=(26, 14)) # plot 1 n_rounds = 0 en = Environment(machines, payouts, n_rounds) tsa = ThompsonSampler(env=en) plt.subplot(231) for i in range(len(machines)): pdf = beta(tsa.a[i], tsa.b[i]).pdf(x) c = cmap[i] plt.plot(x, pdf, c=c, label=i, alpha=.6) plt.title(f"Prior distribution for each variant (uniform between 0 and 1)") plt.legend() # plot 2 n_rounds = 500 en = Environment(machines, payouts, n_rounds) tsa = ThompsonSampler(env=en) en.run(agent=tsa) plt.subplot(232) for i in range(len(machines)): pdf = beta(tsa.a[i], tsa.b[i]).pdf(x) c = cmap[i] plt.plot(x, pdf, c=c, label=i, alpha=.6)
# SCIPY # Anonymous functions square = lambda x: x**2 square(2) from scipy.integrate import quad quad(lambda x: x**3, 0, 1) # A histogram an the shape of the distribution import numpy as np from scipy.stats import beta import matplotlib.pyplot as plt q = beta(5, 5) # Beta(a, b), with a = b = 5 obs = q.rvs(2000) # 2000 observations grid = np.linspace(0.01, 0.99, 100) fig, ax = plt.subplots() ax.hist(obs, bins=40, normed=True) ax.plot(grid, q.pdf(grid), 'k-', linewidth=2) fig.show() ########################## ## EXERCISE: Employment simulation ########################## ''' Using US unemployment data, Hamilton [Ham05] estimated the stochastic matrix P = 0.971 0.029 0 0.145 0.778 0.077
import best_model import pandas as pd if __name__ == '__main__': x_train, y_train, x_test = feature_engineering_titanic.read_titanic() x_train = x_train.as_matrix() y_train = y_train.as_matrix() x_test = x_test.as_matrix() # split train validate # x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.3, random_state=0) # get best model one_to_left = st.beta(10, 1) from_zero_positive = st.expon(0, 50) params = { "n_estimators": st.randint(3, 40), "max_depth": st.randint(3, 10), "learning_rate": st.uniform(0.05, 0.4), "colsample_bytree": one_to_left, "subsample": one_to_left, "gamma": st.uniform(0, 10), 'reg_alpha': from_zero_positive, "min_child_weight": from_zero_positive, } xgb_clf = XGBClassifier(nthreads=-1) best_xgb_model = best_model.get_best_model(x_train,
24 * 60 sleep.max() # So somebody slept 23.5 / 24 hours # It's possible to fit this using a beta distribution # Beta is only defined on [0, 1] sleep.min() # So we'll need to scale it # All RV's in Scipy have parameters for shape, location and scale stats.beta.fit? stats.beta.fit(sleep, floc=0, fscale = 24 * 60) # floc means 'fixed location' bparams = stats.beta.fit(sleep, floc=0, fscale = 24 * 60) # We know the shape and scale, so we'll fit using this knowledge # This is the MLE for alpha and beta parameters # Now we make a random variable sbeta = stats.beta(*bparams) sbeta sbeta.interval(1) sbeta.mean() sleep.mean() # So sbeta here is the beta distribution fitted to this data # Let's plot it and make sure # that it looks reasonable x = np.linspace(0, 60*24) h, edges = np.histogram(sleep, 30, normed=True) plt.bar(edges[:-1], h, width=np.diff(edges)) plt.plot(x, sbeta.pdf(x), linewidth=4, color='orange') plt.clf()
""" Styles ====== _thumb: .8, .8 """ import numpy as np from scipy import stats import matplotlib.pyplot as plt import arviz as az x = np.linspace(0, 1, 100) dist = stats.beta(2, 5).pdf(x) style_list = [ 'default', ['default', 'arviz-colors'], 'arviz-darkgrid', 'arviz-whitegrid', 'arviz-white' ] fig = plt.figure(figsize=(12, 12)) for idx, style in enumerate(style_list): with az.style.context(style): ax = fig.add_subplot(3, 2, idx + 1, label=idx) for i in range(10): ax.plot(x, dist - i, f'C{i}', label=f'C{i}') ax.set_title(style) ax.set_xlabel('x') ax.set_ylabel('f(x)', rotation=0, labelpad=15) ax.legend(bbox_to_anchor=(1, 1)) plt.tight_layout()
plt.axvline(mle, linestyle ="--") line1, = plt.plot(possible_thetas, likelihoods) bins = [x/100 for x in range(100)] counts, bins = np.histogram(infections_rates, bins=bins) counts = counts / counts.sum() line2, = plt.plot(bins[:-1], counts) plt.xlabel("Theta") plt.title("Evidence vs Historical Infection Rates") plt.legend((line1, line2), ('Likelihood of Theta with new evidence', 'Frequency of Theta in last 100 months') , loc = 'upper left') plt.show() # Model the data with a beta function prior_a, prior_b = beta.fit(infections_rates, floc = 0, fscale = 1)[0:2] # Fit data to find a & b for the beta dist. prior = beta(prior_a, prior_b) prior_samples = prior.rvs(10000) # Sample from the prior beta_sample_counts, bins = np.histogram(prior_samples, bins) total = beta_sample_counts.sum() beta_sample_counts = [x / total for x in beta_sample_counts] plt.figure(figsize=(10, 7)) line1, = plt.plot(bins[:-1], beta_sample_counts) hist_rates, bins = np.histogram(infections_rates, bins) total = hist_rates.sum() hist_rates = [x/total for x in hist_rates] line2, = plt.plot(bins[:-1], hist_rates)
#plot Bayesian updates to beta function with prior 1.4, 2.3 from scipy.stats import beta import numpy as np import matplotlib.pyplot as plt x = np.linspace(0, 1, num = 100) plt.plot(x, beta(1.4, 2.3).pdf(x), label = "Prior = beta(1.4,2.3)") for i in range(1,11,2): plt.plot(x, beta(1.4+i,2.3).pdf(x), label = "After {} heads".format(i)) for i in range(1,6,2): plt.plot(x, beta(1.4+i*5+10,2.3).pdf(x), label = "After {} heads".format(i*5+10)) plt.legend() plt.title("Updates to prior distribution") plt.show()
from scipy import stats from scipy import optimize as opt from scipy.stats import beta, uniform # ベータ分布と一様分布 import matplotlib.pyplot as plt # %matplotlib inline plt.style.use("ggplot") np.random.seed(123) # 目標分布 a, b = 1.5, 2.0 x = np.linspace(beta.ppf(0.001, a, b), beta.ppf(0.999, a, b), 100) # ベータ分布x=0.001-0.999まで100個準備 plt.plot(x, beta.pdf(x, a, b)) # 上記ベータ分布の最大値のxを求める f = beta(a=a, b=b).pdf res = opt.fmin(lambda x: -f(x), 0.3) # 最大値求めるのを最小値求めるのに変えるために-f(x)にしている y_max = f(res) y_max NMCS = 5000 x_mcs = uniform.rvs(size=NMCS) # uniform.rvs:一様分布に従うサンプリング r = uniform.rvs(size=NMCS) * y_max accept = x_mcs[r <= f(x_mcs)] plt.hist(accept, bins=30, rwidth=0.8, label="rejection sampling") x = np.linspace(beta.ppf(0.001, a, b), beta.ppf(0.999, a, b), 100) plt.plot(x, beta.pdf(x, a, b), label="Target dis") plt.legend()
def main(): from KDEpy import FFTKDE, NaiveKDE from KDEpy.binning import linear_binning import matplotlib.pyplot as plt from scipy import stats np.random.seed(123) dist = stats.lognorm(1, 1) plt.figure(figsize=(14, 6)) kernel = 'triweight' N = 10**3 data = dist.rvs(int(N)) plt.scatter(data, np.zeros_like(data), marker='|') x, y = NaiveKDE(bw='silverman', kernel=kernel).fit(data)(2**10) plt.plot(x, y, label='FFTKDE') plt.plot(x, dist.pdf(x), label='True') # ----------------------------------------------------------------------- # Adaptive alpha = 1.9 bw = 'silverman' kde = NaiveKDE(kernel='epa', bw=bw) kde.fit(data)(x) #y = NaiveKDE(bw=kde.bw*lambda_i).fit(x, weights=binned_data*lambda_i)(x) #plt.plot(x, y + np.ones_like(x)*0.00, label='Adaptive') # The FFTKDE grid may be wrong, but the true density cannot be # smaller than (1/N) K(0) at a given point min_kde = (1 / int(N)) * kde.kernel(0) kde_data = np.maximum(min_kde, kde(data)) kde_data = kde(data) bw = kde.bw * ((kde_data) / stats.mstats.gmean(kde_data))**-alpha print(np.min(kde(data))) print(stats.mstats.gmean(kde(data))) print(kde.bw, bw) #bw = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])/100 plt.scatter(data, kde_data) y = NaiveKDE(bw=bw, kernel=kernel).fit(data, weights=None)(x) plt.plot(x, kde.bw * ((kde(x) + 0) / stats.mstats.gmean(kde_data))**-alpha, label='bw') plt.plot(x, y + np.ones_like(x) * 0.00, label='Adaptive') plt.ylim([0, 0.7]) plt.legend() plt.show() print('-' * 32) # ----------------------------------------------------------------------- # Mirror at bounds plt.figure(figsize=(14, 6)) # Beta distribution, where x=1 is a hard lower limit dist = stats.beta(a=1.05, b=3, loc=0, scale=1) # Plot the normal KDE and the true density data = dist.rvs(10**2) plt.figure(figsize=(14, 6)) kde = FFTKDE(bw='silverman', kernel='triweight') x, y = kde.fit(data)(2**10) plt.figure(figsize=(14, 6)) plt.plot(x, dist.pdf(x), label='True') plt.plot(x, y, label='FFTKDE') plt.scatter(data, np.zeros_like(data), marker='|') print(np.min(data), np.max(data)) data_transformed = np.log(data) plt.scatter(data_transformed, np.zeros_like(data_transformed), marker='|') kde = FFTKDE(bw='silverman', kernel='triweight') x, y = kde.fit(data_transformed)(2**10) plt.plot(x, y, label='FFTKDE - transformed') print(x) print(y) plt.plot(np.exp(x), 2 * np.exp(y) * (1 + y) - 2) plt.ylim([0, 3]) plt.xlim([-1, 4]) plt.legend() plt.show() # ------------------------------------------------------------------------- # Data on a circle # Beta distribution, where x=1 is a hard lower limit np.random.seed(123) dist1 = stats.norm(loc=0, scale=1) dist2 = stats.norm(loc=20, scale=1) dist3 = stats.norm(loc=40, scale=1) data = np.hstack([dist1.rvs(10**3), dist2.rvs(10**3), dist3.rvs(10**3)]) plt.figure(figsize=(14, 6)) x, y = FFTKDE(bw='silverman').fit(data)() plt.plot(x, (dist1.pdf(x) + dist2.pdf(x) + dist3.pdf(x)) / 3, label='True distribution') plt.plot(x, y, label="FFTKDE with Silverman's rule") y = FFTKDE(bw='ISJ').fit(data)(x) plt.plot(x, y, label="FFTKDE with Improved Sheather Jones (ISJ)") plt.legend() plt.show()