def select_arm(self): #self.a = np.argmax(pymc.rbeta(1 + self.wins, 1 + self.k_n - self.wins)) ts_values = pymc.rbeta(1 + self.wins, 1 + self.k_n - self.wins) max_p_t = np.max(ts_values) max_idxs = np.argwhere(ts_values == max_p_t).flatten() self.a = np.random.choice(max_idxs) return
def generate_and_append_data(data, data_type, truth, age_intervals, gbd_region='Asia, Southeast', country='Thailand', year=2005, sex='male'): """ create simulated data""" for a0, a1 in age_intervals: d = { 'condition': 'type_2_diabetes', 'data_type': data_type, 'gbd_region': gbd_region, 'region': country, 'year_start': year, 'year_end': year, 'sex': sex, 'age_start': a0, 'age_end': a1, 'age_weights': list(np.ones(a1 + 1 - a0)), 'id': len(data) } p0 = dismod3.utils.rate_for_range(truth, range(a0, a1 + 1), np.ones(a1 + 1 - a0)) p1 = mc.rbeta(p0 * dispersion, (1 - p0) * dispersion) p2 = mc.rbinomial(n, p1) / n d['value'] = p2 d['standard_error'] = np.sqrt(p2 * (1 - p2) / n) data.append(d)
def sample(self, n=1): score = np.zeros(n) choices = np.zeros(n) regret = np.zeros(n) for k in range(n): # sample from the bandits's priors, and select the largest sample choice = np.argmax( rbeta(1 + self.wins, 1 + self.trials - self.wins)) # sample the chosen bandit result = self.bandits.pull(choice) # update stats (which updates priors for next pull) self.update_stats(k, choice, result) score[k] = result choices[k] = choice regret[k] = self.bandits.p[ self.bandits.optimal] - self.bandits.p[choice] self.score = np.r_[self.score, score] self.choices = np.r_[self.choices, choices] self.regret = np.r_[self.regret, regret] return
def select_arm(self): n_arms = len(self.counts) for arm in range(n_arms): if self.counts[arm] == 0: return arm val = np.array(self.values) count = np.array(self.counts) return np.argmax(rbeta(1 + val, 1 + count - val))
def select_arm(self): ts_values = pymc.rbeta(1 + self.wins, 1 + self.k_n - self.wins) # randomly break ties, np.argmax return the first occurence of maximum. # get all occurences of the max and randomly select between them max_p_t = np.max(ts_values) max_idxs = np.argwhere(ts_values == max_p_t).flatten() self.a = np.random.choice(max_idxs) return
def thompsonExp(): for i in range(1, exp_count): b = pymc.rbeta(1 + wins, 1 + trials - wins) item = np.argmax(b) trials[item] += 1 # Rewrite the judement code result = np.random.binomial(n=1, p=arm_probs[item]) if result == 1: wins[item] += 1
def run(mu1,mu2,mu3,N=10000): bandits=[TSBandit(mu1),TSBandit(mu2),TSBandit(mu3)] data=np.empty(N)#N個未初始化的值 n_arm=len(bandits) for i in range(N): j = np.argmax([pymc.rbeta(1 + b.wins, 1 +b.lose)for b in bandits]) x=bandits[j].pull() bandits[j].update(x) data[i]=x cumul_average=np.cumsum(data)/(np.arange(N)+1) return cumul_average
def test(self, function_g = lambda x: x, function_b = lambda x: 1,seed = 2): '''function_g and function_b are the functions on number of goods or number of bads to add to the beta draw. Default is H-test if GOOD, or 1 if BAD''' # np.random.seed(seed) # pm.numpy.random.seed(seed) self.name = "Langsam Adaptive Bayesian" l = 10 powers = np.arange(0,l,1) # sizes = 2**powers sizes = np.array([1,2,3,4,5,6,7,8,9,10,15,20,25,30]) l = len(sizes) Gs = np.zeros(l) Bs = np.zeros(l) choice_history = [] f = open('./test.txt', 'w') f2 = open('./draws.txt', 'w') while self.queue.size > 0: draw = rbeta(1 + np.log2(sizes) + Gs, 1 + Bs) # f2.write("{}\n\n".format(draw)) draw = np.argmax(draw) h_now = sizes[draw] choice_history.append(h_now) f.write ("{} ".format(h_now)) # Update histories self.update(h_now) q_now = self.q_history[-1] items, self.queue = np.split(self.queue,[h_now]) if all(items): self.G += items.size # Gs[draw] += function_g(items.size) Gs[draw] += (draw+1)/10 f.write ("SUCCESS ") # Update else: f.write ("_______ ") self.B += 1 self.g_test(items) # Bs[draw] += function_b(items.size) Bs[draw] += 1 / (1 + draw) f.write ("({:.3f},{:.3f})\n".format((np.log2(sizes) + 1 + Gs)[draw], 1 + Bs[draw])) f.close() f2.close # print(sizes,'\n',1 + Gs,'\n', 1 + Bs) # print (np.histogram(choice_history, bins=sizes)[0]) # # print (choice_history) # print() self.choices = choice_history
def sample_bandits(self, n=1): bb_score = np.zeros(n) choices = np.zeros(n) for k in range(n): choice = np.argmax(rbeta(self.wins, self.trials - self.wins)) result = self.bandits.pull(choice) self.wins[choice] += result self.trials[choice] += 1 bb_score[k] = result choices[k] = choice self.N += 1 self.bb_score = np.r_[self.bb_score, bb_score] self.choices = np.r_[self.choices, choices] return
def choose(self): """ Make decision """ clicks = [] publishs = [] skips = [] for i in range(len(self.action_names)): clicks.append(self.action_record[self.action_names[i]][KEY_CLICK]) publishs.append( self.action_record[self.action_names[i]][KEY_PUBLISH]) clicks = np.array(clicks) publishs = np.array(publishs) skips = publishs - clicks probs = [] for click, skip in zip(clicks, skips): probs.append(pm.rbeta(click, skip)) probs = np.array(probs) print(probs) index = np.argmax(probs) return self.action_names[index]
def plot_beta_binomial_funnel(alpha, beta): pi_true = alpha/(alpha+beta) pi = mc.rbeta(alpha, beta, size=10000) n = pl.exp(mc.rnormal(10, 2**-2, size=10000)) k = mc.rbinomial(pl.array(n, dtype=int), pi) r = k/n pl.vlines([pi_true], .1*n.min(), 10*n.max(), linewidth=2, linestyle='-', color='w', zorder=9) pl.vlines([pi_true], .1*n.min(), 10*n.max(), linewidth=1, linestyle='--', color='black', zorder=10) pl.plot(r, n, 'ko', mew=0, alpha=.25) pl.semilogy(schiz['r'], schiz['n'], 'ks', mew=1, mec='white', ms=4, label='Observed values') pl.xlabel('Rate (per PY)') pl.ylabel('Study size (PY)') pl.xticks([0, .005, .01]) pl.axis([-.0001, .0101, 50., 1500000]) pl.title(r'$\alpha=%d$, $\beta=%d$' % (alpha, beta))
def sample_bandits(self, n=1): bb_score = np.zeros(n) choices = np.zeros(n) for k in range(n): # sample from the bandits's priors, and select the largest sample choice = np.argmax(rbeta(1 + self.wins, 1 + self.trials - self.wins)) # sample the chosen bandit result = self.bandits.pull(choice) # update priors and score self.wins[choice] += result self.trials[choice] += 1 bb_score[k] = result self.N += 1 choices[k] = choice self.bb_score = np.r_[self.bb_score, bb_score] self.choices = np.r_[self.choices, choices] return
def sample_bandits( self, n=1 ): bb_score = np.zeros( n ) choices = np.zeros( n ) for k in range(n): #sample from the bandits's priors, and select the largest sample choice = np.argmax( rbeta( 1 + self.wins, 1 + self.trials - self.wins) ) #sample the chosen bandit result = self.bandits.pull( choice ) #update priors and score self.wins[ choice ] += result self.trials[ choice ] += 1 bb_score[ k ] = result self.N += 1 choices[ k ] = choice self.bb_score = np.r_[ self.bb_score, bb_score ] self.choices = np.r_[ self.choices, choices ] return
def generate_and_append_data(data, data_type, truth, age_intervals, gbd_region='Asia, Southeast', country='Thailand', year=2005, sex='male'): """ create simulated data""" for a0, a1 in age_intervals: d = { 'condition': 'type_2_diabetes', 'data_type': data_type, 'gbd_region': gbd_region, 'region': country, 'year_start': year, 'year_end': year, 'sex': sex, 'age_start': a0, 'age_end': a1, 'age_weights': list(np.ones(a1 + 1 - a0)), 'id': len(data)} p0 = dismod3.utils.rate_for_range(truth, range(a0, a1 + 1), np.ones(a1 + 1 - a0)) p1 = mc.rbeta(p0 * dispersion, (1 - p0) * dispersion) p2 = mc.rbinomial(n, p1) / n d['value'] = p2 d['standard_error'] = np.sqrt(p2 * (1 - p2) / n) data.append(d)
def plot_beta_binomial_funnel(alpha, beta): pi_true = alpha / (alpha + beta) pi = mc.rbeta(alpha, beta, size=10000) n = pl.exp(mc.rnormal(10, 2**-2, size=10000)) k = mc.rbinomial(pl.array(n, dtype=int), pi) r = k / n pl.vlines([pi_true], .1 * n.min(), 10 * n.max(), linewidth=2, linestyle='-', color='w', zorder=9) pl.vlines([pi_true], .1 * n.min(), 10 * n.max(), linewidth=1, linestyle='--', color='black', zorder=10) pl.plot(r, n, 'ko', mew=0, alpha=.25) pl.semilogy(schiz['r'], schiz['n'], 'ks', mew=1, mec='white', ms=4, label='Observed values') pl.xlabel('Rate (per PY)') pl.ylabel('Study size (PY)') pl.xticks([0, .005, .01]) pl.axis([-.0001, .0101, 50., 1500000]) pl.title(r'$\alpha=%d$, $\beta=%d$' % (alpha, beta))
def f(sp_sub, x, a, b): p = pm.invlogit(sp_sub(x)) h = pm.rbeta(a,b,size=len(p)) return g6pd.p_fem_def(p,h)
def f(sp_sub, a, b, n=n): p = pm.invlogit(sp_sub) h = pm.rbeta(a, b, size=len(sp_sub)) p_def = g6pd.p_fem_def(p, h) return pm.rbinomial(n=n, p=p)
def policy(self, experiment, k): return np.argmax( rbeta(experiment.success + self.alpha, experiment.failure + self.beta))
def bayesian_bandit_choice(self): return np.argmax(rbeta(1 + self.wins, 1 + self.trials - self.wins))
def Thompson_sampling(estimated_beta_params): totals = estimated_beta_params.sum(1) #The number of experiments per arm successes = estimated_beta_params[:,0] return np.argmax(pymc.rbeta(1 + successes, 1 + totals - successes))
def thompson_sample_desc(estimated_beta_params): successes = estimated_beta_params[:, 0] totals = estimated_beta_params.sum(1) choice = numpy.argmin(pymc.rbeta(1 + successes, 1 + totals - successes)) return choice
def pred(alpha=alpha, beta=beta, phi=phi): if pl.rand() < phi: return 0 else: return mc.rbinomial(n_pred, mc.rbeta(alpha, beta)) / float(n_pred)
import numpy as np import pymc """ Thompson sampling算法python实现 """ successes = 10 totals = 100 np.argmax(pymc.rbeta(1 + successes, 1 + totals - successes))
def fem_def(sp_sub, a, b): h**o = male_def(sp_sub) hetero = hw_hetero(sp_sub) het_def = pm.rbeta(a,b) hetero *= het_def return hetero+h**o
def f(sp_sub, a, b, n=n): p = pm.invlogit(sp_sub) h = pm.rbeta(a,b,size=len(sp_sub)) p_def = g6pd.p_fem_def(p,h) return pm.rbinomial(n=n, p=p)
def pred(alpha=alpha, beta=beta): return mc.rbinomial(n_pred, mc.rbeta(alpha, beta)) / float(n_pred)
def bayesian_bandit_choice(self): return np.argmax( rbeta( 1 + self.wins, 1 + self.trials - self.wins) )
def bayesian_bandit(self): alpha = 1 + self.wins b = 1 + self.trials - self.wins # beta var and this is your losses return np.argmax( rbeta( alpha, b) )
def f(sp_sub, x, a, b): p = pm.invlogit(sp_sub(x)) h = pm.rbeta(a, b, size=len(p)) return g6pd.p_fem_def(p, h)
import numpy as np import pymc #wins 和 trials 是一个N维向量,N是赌博机的臂的个数,每个元素记录了 choice = np.argmax(pymc.rbeta(1 + wins, 1 + trials - wins)) wins[choice] += 1 trials += 1
def fem_def(sp_sub, a, b): h**o = male_def(sp_sub) hetero = hw_hetero(sp_sub) het_def = pm.rbeta(a, b) hetero *= het_def return hetero + h**o
def DoSamplingIteration(self, y): # Setup try: xLast = self.xSeq[-1] varLast = self.varianceSeq[-1] except: raise NameError('Cannot access xLast and/or varLast') # n0 = #{ i : xLast[i] == 0} whereas n1 = ||xLast||_0 n0, n1 = NumericalHelper.CalculateNumZerosNonzeros(xLast, self.Eps) # Sample to get w|xLast and a|xLast,alpha wSample = pymc.rbeta(1 + n1, 1 + n0) assert (wSample >= 0) and (wSample <= 1), __file__ + ': wSample is out of bounds' logging.info(" Samp. Iter. {0}, generating wSample ~ Beta({1},{2}) ... {3:.5f}".format(self._samplerIter, 1 + n1, 1 + n0, wSample)) igShapeForA = n1 + self.hyperparameterPriorDict['alpha0'] xLastL1Norm = np.sum(np.abs(xLast)) igScaleForA = xLastL1Norm + self.hyperparameterPriorDict['alpha1'] assert (igShapeForA > 0) and (igScaleForA > 0), "IG shape and scale aren't strictly positive" bSampleGenerated = False aSample = None for tryInd in range(5, 0, -1): try: aSample = pymc.rinverse_gamma(igShapeForA, igScaleForA) bSampleGenerated = True except (ZeroDivisionError, OverflowError) as e: logging.error("Couldn't generate aSample ~ IG({0},{1}) using n0={2}, n1={3}: {4}".format(igShapeForA, igScaleForA, n0, n1, e.message)) # Only raise the exception if repeated attempts failed if tryInd == 0: raise if bSampleGenerated is True: break logging.info(" Samp. Iter. {0}, generating aSample ~ IG({1:.4f},{2:.4f}) ... {3:.4e}".format(self._samplerIter, igShapeForA, igScaleForA, aSample)) self.hyperparameterSeq.append({'w' : wSample, 'a' : aSample}) # Sample to get x_i, 1 <= i <= M. The method DoSamplingXConditionedAll updates self.xSeq and self._mappedX xNext, hxNext = self.DoSamplingXConditionedAll(y, wSample, aSample, varLast, xLast, self.hx, self.nVerbose > 0) # Sample to get variance yErr = y - hxNext[:, 0] igShapeForVariance = y.size / 2 igScaleForVariance = np.sum(yErr * yErr) / 2 varianceSample = pymc.rinverse_gamma(igShapeForVariance, igScaleForVariance) logging.info(" Samp. Iter. {0}, generating varianceSample ~ IG({1:.4f},{2:.4f}) ... {3:.4e}".format( self._samplerIter, igShapeForVariance, igScaleForVariance, varianceSample )) self.varianceSeq.append(varianceSample) self.iterObserver.UpdateState({ McmcIterationEvaluator.STATE_KEY_COUNT_ITER: self._samplerIter, McmcIterationEvaluator.STATE_KEY_X_ITER: xNext, McmcIterationEvaluator.STATE_KEY_HX_ITER: hxNext, McmcIterationEvaluator.STATE_KEY_W_ITER: wSample, McmcIterationEvaluator.STATE_KEY_A_ITER: aSample, McmcIterationEvaluator.STATE_KEY_NOISEVAR_ITER: varianceSample }) self.xSeq.append(xNext) assert len(self.xSeq) == len(self.varianceSeq), __file__ + ': DoSamplingIteration: seq length mismatch #1' assert len(self.xSeq) == (len(self.hyperparameterSeq) + 1), __file__ + ': DoSamplingIteration: seq length mismatch #2' self.hx = hxNext
@contact: [email protected] @file: beta.py @time: 2018/1/22 下午7:27 @desc: beta分布图像 """ import sys import pymc from scipy.stats import beta import matplotlib.pyplot as plt import numpy as np reload(sys) sys.setdefaultencoding('utf8') a, b = 32, 199 print pymc.rbeta(1 + a, b) print pymc.rbeta(1 + a, b) print pymc.rbeta(1 + a, b) print pymc.rbeta(1 + a, b + 1) print pymc.rbeta(1 + a, b + 1) print pymc.rbeta(1 + a, b + 1) mean, var, skew, kurt = beta.stats(a, b, moments='mvsk') print beta.pdf(0.1, a, b) x = np.linspace(0, 1, 100) plt.plot(x, beta.pdf(x, a, b), 'r-', lw=5, alpha=0.6, label='beta pdf') plt.show()