示例#1
0
文件: stackplot.py 项目: evamush/MAB
 def select_arm(self):
     #self.a = np.argmax(pymc.rbeta(1 + self.wins, 1 + self.k_n - self.wins))
     ts_values = pymc.rbeta(1 + self.wins, 1 + self.k_n - self.wins)
     max_p_t = np.max(ts_values)
     max_idxs = np.argwhere(ts_values == max_p_t).flatten()
     self.a = np.random.choice(max_idxs)
     return
示例#2
0
def generate_and_append_data(data,
                             data_type,
                             truth,
                             age_intervals,
                             gbd_region='Asia, Southeast',
                             country='Thailand',
                             year=2005,
                             sex='male'):
    """ create simulated data"""
    for a0, a1 in age_intervals:
        d = {
            'condition': 'type_2_diabetes',
            'data_type': data_type,
            'gbd_region': gbd_region,
            'region': country,
            'year_start': year,
            'year_end': year,
            'sex': sex,
            'age_start': a0,
            'age_end': a1,
            'age_weights': list(np.ones(a1 + 1 - a0)),
            'id': len(data)
        }

        p0 = dismod3.utils.rate_for_range(truth, range(a0, a1 + 1),
                                          np.ones(a1 + 1 - a0))
        p1 = mc.rbeta(p0 * dispersion, (1 - p0) * dispersion)
        p2 = mc.rbinomial(n, p1) / n

        d['value'] = p2
        d['standard_error'] = np.sqrt(p2 * (1 - p2) / n)

        data.append(d)
示例#3
0
    def sample(self, n=1):

        score = np.zeros(n)
        choices = np.zeros(n)
        regret = np.zeros(n)

        for k in range(n):
            # sample from the bandits's priors, and select the largest sample
            choice = np.argmax(
                rbeta(1 + self.wins, 1 + self.trials - self.wins))

            # sample the chosen bandit
            result = self.bandits.pull(choice)

            # update stats (which updates priors for next pull)
            self.update_stats(k, choice, result)
            score[k] = result
            choices[k] = choice
            regret[k] = self.bandits.p[
                self.bandits.optimal] - self.bandits.p[choice]

        self.score = np.r_[self.score, score]
        self.choices = np.r_[self.choices, choices]
        self.regret = np.r_[self.regret, regret]
        return
 def select_arm(self):
     n_arms = len(self.counts)
     for arm in range(n_arms):
         if self.counts[arm] == 0:
             return arm
     val = np.array(self.values)
     count = np.array(self.counts)
     return np.argmax(rbeta(1 + val, 1 + count - val))
示例#5
0
 def select_arm(self):
     ts_values = pymc.rbeta(1 + self.wins, 1 + self.k_n - self.wins)
     # randomly break ties, np.argmax return the first occurence of maximum.
     # get all occurences of the max and randomly select between them
     max_p_t = np.max(ts_values)
     max_idxs = np.argwhere(ts_values == max_p_t).flatten()
     self.a = np.random.choice(max_idxs)
     return
def thompsonExp():
    for i in range(1, exp_count):
        b = pymc.rbeta(1 + wins, 1 + trials - wins)
        item = np.argmax(b)
        trials[item] += 1
        # Rewrite the judement code
        result = np.random.binomial(n=1, p=arm_probs[item])
        if result == 1:
            wins[item] += 1
示例#7
0
def run(mu1,mu2,mu3,N=10000):
    bandits=[TSBandit(mu1),TSBandit(mu2),TSBandit(mu3)]
    data=np.empty(N)#N個未初始化的值
    n_arm=len(bandits)
    for i in range(N):

        j = np.argmax([pymc.rbeta(1 + b.wins, 1 +b.lose)for b in bandits])

        x=bandits[j].pull()
        bandits[j].update(x)
        data[i]=x
    cumul_average=np.cumsum(data)/(np.arange(N)+1)
    return cumul_average
示例#8
0
    def test(self, function_g = lambda x: x, function_b = lambda x: 1,seed = 2):
        '''function_g and function_b are the functions on number of goods or number of bads to add to
        the beta draw. Default is H-test if GOOD, or 1 if BAD'''
#         np.random.seed(seed)
#         pm.numpy.random.seed(seed)
        self.name = "Langsam Adaptive Bayesian"
        l = 10
        powers = np.arange(0,l,1)
#         sizes = 2**powers
        sizes = np.array([1,2,3,4,5,6,7,8,9,10,15,20,25,30])
        l = len(sizes)
        Gs = np.zeros(l)
        Bs = np.zeros(l)
        choice_history = []
        
        
        f = open('./test.txt', 'w')
        f2 = open('./draws.txt', 'w')
        while self.queue.size > 0:
            draw = rbeta(1 + np.log2(sizes) + Gs, 1 + Bs)
#             f2.write("{}\n\n".format(draw))
            draw = np.argmax(draw)
            h_now = sizes[draw]
            choice_history.append(h_now)
            
            f.write ("{} ".format(h_now))
            # Update histories
            self.update(h_now)
            q_now = self.q_history[-1]
            items, self.queue = np.split(self.queue,[h_now])
            if all(items):
                self.G += items.size
#                 Gs[draw] += function_g(items.size)
                Gs[draw] += (draw+1)/10
                f.write ("SUCCESS ")
                # Update
            else:
                f.write ("_______ ")
                self.B += 1
                self.g_test(items)
#                 Bs[draw] += function_b(items.size)
                Bs[draw] += 1 / (1 + draw)
            f.write ("({:.3f},{:.3f})\n".format((np.log2(sizes) + 1 + Gs)[draw], 1 + Bs[draw]))
        f.close()
        f2.close
#         print(sizes,'\n',1 + Gs,'\n', 1 + Bs)
#         print (np.histogram(choice_history, bins=sizes)[0])
# #         print (choice_history)
#         print()
        self.choices = choice_history
    def sample_bandits(self, n=1):
        bb_score = np.zeros(n)
        choices = np.zeros(n)
        for k in range(n):
            choice = np.argmax(rbeta(self.wins, self.trials - self.wins))
            result = self.bandits.pull(choice)
            self.wins[choice] += result
            self.trials[choice] += 1
            bb_score[k] = result
            choices[k] = choice
            self.N += 1

        self.bb_score = np.r_[self.bb_score, bb_score]
        self.choices = np.r_[self.choices, choices]
        return
示例#10
0
 def choose(self):
     """
         Make decision
     """
     clicks = []
     publishs = []
     skips = []
     for i in range(len(self.action_names)):
         clicks.append(self.action_record[self.action_names[i]][KEY_CLICK])
         publishs.append(
             self.action_record[self.action_names[i]][KEY_PUBLISH])
     clicks = np.array(clicks)
     publishs = np.array(publishs)
     skips = publishs - clicks
     probs = []
     for click, skip in zip(clicks, skips):
         probs.append(pm.rbeta(click, skip))
     probs = np.array(probs)
     print(probs)
     index = np.argmax(probs)
     return self.action_names[index]
示例#11
0
def plot_beta_binomial_funnel(alpha, beta):
    pi_true = alpha/(alpha+beta)
    pi = mc.rbeta(alpha, beta, size=10000)

    n = pl.exp(mc.rnormal(10, 2**-2, size=10000))
    k = mc.rbinomial(pl.array(n, dtype=int), pi)
    r = k/n
    pl.vlines([pi_true], .1*n.min(), 10*n.max(),
              linewidth=2, linestyle='-', color='w', zorder=9)
    pl.vlines([pi_true], .1*n.min(), 10*n.max(),
              linewidth=1, linestyle='--', color='black', zorder=10)
    pl.plot(r, n, 'ko',
            mew=0, alpha=.25)

    pl.semilogy(schiz['r'], schiz['n'], 'ks', mew=1, mec='white', ms=4,
                label='Observed values')

    pl.xlabel('Rate (per PY)')
    pl.ylabel('Study size (PY)')
    pl.xticks([0, .005, .01])
    pl.axis([-.0001, .0101, 50., 1500000])
    pl.title(r'$\alpha=%d$, $\beta=%d$' % (alpha, beta))
    def sample_bandits(self, n=1):

        bb_score = np.zeros(n)
        choices = np.zeros(n)

        for k in range(n):
            # sample from the bandits's priors, and select the largest sample
            choice = np.argmax(rbeta(1 + self.wins, 1 + self.trials - self.wins))

            # sample the chosen bandit
            result = self.bandits.pull(choice)

            # update priors and score
            self.wins[choice] += result
            self.trials[choice] += 1
            bb_score[k] = result
            self.N += 1
            choices[k] = choice

        self.bb_score = np.r_[self.bb_score, bb_score]
        self.choices = np.r_[self.choices, choices]
        return
 def sample_bandits( self, n=1 ):
     
     bb_score = np.zeros( n )
     choices = np.zeros( n )
     
     for k in range(n):
         #sample from the bandits's priors, and select the largest sample
         choice = np.argmax( rbeta( 1 + self.wins, 1 + self.trials - self.wins) )
         
         #sample the chosen bandit
         result = self.bandits.pull( choice )
         
         #update priors and score
         self.wins[ choice ] += result
         self.trials[ choice ] += 1
         bb_score[ k ] = result 
         self.N += 1
         choices[ k ] = choice
         
     self.bb_score = np.r_[ self.bb_score, bb_score ]
     self.choices = np.r_[ self.choices, choices ]
     return 
示例#14
0
def generate_and_append_data(data, data_type, truth, age_intervals,
                             gbd_region='Asia, Southeast', country='Thailand', year=2005, sex='male'):
    """ create simulated data"""
    for a0, a1 in age_intervals:
        d = { 'condition': 'type_2_diabetes',
              'data_type': data_type,
              'gbd_region': gbd_region,
              'region': country,
              'year_start': year,
              'year_end': year,
              'sex': sex,
              'age_start': a0,
              'age_end': a1,
              'age_weights': list(np.ones(a1 + 1 - a0)),
              'id': len(data)}

        p0 = dismod3.utils.rate_for_range(truth, range(a0, a1 + 1), np.ones(a1 + 1 - a0))
        p1 = mc.rbeta(p0 * dispersion, (1 - p0) * dispersion)
        p2 = mc.rbinomial(n, p1) / n
    
        d['value'] = p2
        d['standard_error'] = np.sqrt(p2 * (1 - p2) / n)

        data.append(d)
示例#15
0
def plot_beta_binomial_funnel(alpha, beta):
    pi_true = alpha / (alpha + beta)
    pi = mc.rbeta(alpha, beta, size=10000)

    n = pl.exp(mc.rnormal(10, 2**-2, size=10000))
    k = mc.rbinomial(pl.array(n, dtype=int), pi)
    r = k / n
    pl.vlines([pi_true],
              .1 * n.min(),
              10 * n.max(),
              linewidth=2,
              linestyle='-',
              color='w',
              zorder=9)
    pl.vlines([pi_true],
              .1 * n.min(),
              10 * n.max(),
              linewidth=1,
              linestyle='--',
              color='black',
              zorder=10)
    pl.plot(r, n, 'ko', mew=0, alpha=.25)

    pl.semilogy(schiz['r'],
                schiz['n'],
                'ks',
                mew=1,
                mec='white',
                ms=4,
                label='Observed values')

    pl.xlabel('Rate (per PY)')
    pl.ylabel('Study size (PY)')
    pl.xticks([0, .005, .01])
    pl.axis([-.0001, .0101, 50., 1500000])
    pl.title(r'$\alpha=%d$, $\beta=%d$' % (alpha, beta))
示例#16
0
 def f(sp_sub, x, a, b):
     p = pm.invlogit(sp_sub(x))
     h = pm.rbeta(a,b,size=len(p))
     return g6pd.p_fem_def(p,h)
示例#17
0
 def f(sp_sub, a, b, n=n):
     p = pm.invlogit(sp_sub)
     h = pm.rbeta(a, b, size=len(sp_sub))
     p_def = g6pd.p_fem_def(p, h)
     return pm.rbinomial(n=n, p=p)
示例#18
0
 def policy(self, experiment, k):
     return np.argmax(
         rbeta(experiment.success + self.alpha,
               experiment.failure + self.beta))
示例#19
0
文件: bandit.py 项目: nclv/Python-3.5
def bayesian_bandit_choice(self):
    return np.argmax(rbeta(1 + self.wins, 1 + self.trials - self.wins))
示例#20
0
def Thompson_sampling(estimated_beta_params):
    totals = estimated_beta_params.sum(1)  #The number of experiments per arm
    successes = estimated_beta_params[:,0]
    return np.argmax(pymc.rbeta(1 + successes, 1 + totals - successes))    
示例#21
0
def thompson_sample_desc(estimated_beta_params):
    successes = estimated_beta_params[:, 0]
    totals = estimated_beta_params.sum(1)
    choice = numpy.argmin(pymc.rbeta(1 + successes, 1 + totals - successes))
    return choice
示例#22
0
def pred(alpha=alpha, beta=beta, phi=phi):
    if pl.rand() < phi:
        return 0
    else:
        return mc.rbinomial(n_pred, mc.rbeta(alpha, beta)) / float(n_pred)
示例#23
0
import numpy as np
import pymc


"""
Thompson sampling算法python实现
"""

successes = 10
totals = 100
np.argmax(pymc.rbeta(1 + successes, 1 + totals - successes))

示例#24
0
def fem_def(sp_sub, a, b):
    h**o = male_def(sp_sub)
    hetero = hw_hetero(sp_sub)
    het_def = pm.rbeta(a,b)
    hetero *= het_def
    return hetero+h**o
示例#25
0
 def f(sp_sub, a, b, n=n):
     p = pm.invlogit(sp_sub)
     h = pm.rbeta(a,b,size=len(sp_sub))
     p_def = g6pd.p_fem_def(p,h)
     return pm.rbinomial(n=n, p=p)
示例#26
0
def pred(alpha=alpha, beta=beta, phi=phi):
    if pl.rand() < phi:
        return 0
    else:
        return mc.rbinomial(n_pred, mc.rbeta(alpha, beta)) / float(n_pred)
示例#27
0
def pred(alpha=alpha, beta=beta):
    return mc.rbinomial(n_pred, mc.rbeta(alpha, beta)) / float(n_pred)
def bayesian_bandit_choice(self):
	return np.argmax( rbeta( 1 + self.wins, 1 + self.trials - self.wins) )
示例#29
0
def bayesian_bandit(self):
    alpha = 1 + self.wins
    b = 1 + self.trials - self.wins # beta var and this is your losses
    return np.argmax( rbeta( alpha, b) )
示例#30
0
 def f(sp_sub, x, a, b):
     p = pm.invlogit(sp_sub(x))
     h = pm.rbeta(a, b, size=len(p))
     return g6pd.p_fem_def(p, h)
示例#31
0
import numpy as np
import pymc
#wins 和 trials 是一个N维向量,N是赌博机的臂的个数,每个元素记录了
choice = np.argmax(pymc.rbeta(1 + wins, 1 + trials - wins))
wins[choice] += 1
trials += 1
示例#32
0
def fem_def(sp_sub, a, b):
    h**o = male_def(sp_sub)
    hetero = hw_hetero(sp_sub)
    het_def = pm.rbeta(a, b)
    hetero *= het_def
    return hetero + h**o
    def DoSamplingIteration(self, y):        
        # Setup
        try:
            xLast = self.xSeq[-1]
            varLast = self.varianceSeq[-1]
        except:
            raise NameError('Cannot access xLast and/or varLast')
        
        # n0 = #{ i : xLast[i] == 0} whereas n1 = ||xLast||_0        
        n0, n1 = NumericalHelper.CalculateNumZerosNonzeros(xLast, self.Eps)
        
        # Sample to get w|xLast and a|xLast,alpha
        wSample = pymc.rbeta(1 + n1, 1 + n0) 
        assert (wSample >= 0) and (wSample <= 1), __file__ + ': wSample is out of bounds'               
        logging.info("  Samp. Iter. {0}, generating wSample ~ Beta({1},{2}) ... {3:.5f}".format(self._samplerIter, 1 + n1, 1 + n0, wSample))
        
        igShapeForA = n1 + self.hyperparameterPriorDict['alpha0']
        xLastL1Norm = np.sum(np.abs(xLast))
        igScaleForA = xLastL1Norm + self.hyperparameterPriorDict['alpha1']
        
        assert (igShapeForA > 0) and (igScaleForA > 0), "IG shape and scale aren't strictly positive"
       
        bSampleGenerated = False
        aSample = None
        
        for tryInd in range(5, 0, -1):     
            try:
                aSample = pymc.rinverse_gamma(igShapeForA, igScaleForA)
                bSampleGenerated = True                
            except (ZeroDivisionError, OverflowError) as e:
                logging.error("Couldn't generate aSample ~ IG({0},{1}) using n0={2}, n1={3}: {4}".format(igShapeForA, igScaleForA, n0, n1, e.message))
                # Only raise the exception if repeated attempts failed
                if tryInd == 0:
                    raise            
            if bSampleGenerated is True:
                break
                    
        logging.info("  Samp. Iter. {0}, generating aSample ~ IG({1:.4f},{2:.4f}) ... {3:.4e}".format(self._samplerIter, igShapeForA, igScaleForA, aSample))
                    
        self.hyperparameterSeq.append({'w' : wSample, 'a' : aSample})        
        
        # Sample to get x_i, 1 <= i <= M. The method DoSamplingXConditionedAll updates self.xSeq and self._mappedX        
        xNext, hxNext = self.DoSamplingXConditionedAll(y, wSample, aSample, varLast, xLast, self.hx, self.nVerbose > 0)
                
        # Sample to get variance
        yErr = y - hxNext[:, 0]
        igShapeForVariance = y.size / 2
        igScaleForVariance = np.sum(yErr * yErr) / 2
        varianceSample = pymc.rinverse_gamma(igShapeForVariance, igScaleForVariance)        
        logging.info("  Samp. Iter. {0}, generating varianceSample ~ IG({1:.4f},{2:.4f}) ... {3:.4e}".format(
                                                                                                             self._samplerIter,
                                                                                                             igShapeForVariance, 
                                                                                                             igScaleForVariance, 
                                                                                                             varianceSample
                                                                                                             ))        
        self.varianceSeq.append(varianceSample)        

        self.iterObserver.UpdateState({
                                       McmcIterationEvaluator.STATE_KEY_COUNT_ITER: self._samplerIter,
                                       McmcIterationEvaluator.STATE_KEY_X_ITER: xNext,
                                       McmcIterationEvaluator.STATE_KEY_HX_ITER: hxNext,
                                       McmcIterationEvaluator.STATE_KEY_W_ITER: wSample,
                                       McmcIterationEvaluator.STATE_KEY_A_ITER: aSample,
                                       McmcIterationEvaluator.STATE_KEY_NOISEVAR_ITER: varianceSample
                                       })
        
        self.xSeq.append(xNext)
        assert len(self.xSeq) == len(self.varianceSeq), __file__ + ': DoSamplingIteration: seq length mismatch #1'
        assert len(self.xSeq) == (len(self.hyperparameterSeq) + 1), __file__ + ': DoSamplingIteration: seq length mismatch #2'
        
        self.hx = hxNext
示例#34
0
@contact: [email protected]
@file: beta.py
@time: 2018/1/22 下午7:27
@desc: beta分布图像

"""

import sys

import pymc
from scipy.stats import beta
import matplotlib.pyplot as plt
import numpy as np
reload(sys)
sys.setdefaultencoding('utf8')

a, b = 32, 199

print pymc.rbeta(1 + a, b)
print pymc.rbeta(1 + a, b)
print pymc.rbeta(1 + a, b)
print pymc.rbeta(1 + a, b + 1)
print pymc.rbeta(1 + a, b + 1)
print pymc.rbeta(1 + a, b + 1)

mean, var, skew, kurt = beta.stats(a, b, moments='mvsk')
print beta.pdf(0.1, a, b)
x = np.linspace(0, 1, 100)
plt.plot(x, beta.pdf(x, a, b), 'r-', lw=5, alpha=0.6, label='beta pdf')
plt.show()