Exemplo n.º 1
0
    def simulate(self, seed, scheme='TS_HGLM'):
        output = open('result/' + scheme + '_seed_' + str(seed) + '.txt', 'w')
        if scheme == 'TS_HGLM':  #Thompson sampling with a hierarchical generalized linear model (partial pooling)
            np.random.seed(seed)
            prior_beta_bar = np.random.normal(-1, 1, self.d)
            tmp = np.random.rand(Scheme.d, Scheme.d)
            prior_Sigma = np.dot(tmp, tmp.T)
            print >> output, prior_beta_bar
            output.flush()

            num_period = self.simulation_time / self.period_length
            budget_for_each_period = self.Budget / num_period
            budget_for_each_website = budget_for_each_period / self.J
            conversion = np.empty([num_period
                                   ])  #used to store conversion in each period

            X = np.empty([self.Budget,
                          self.d])  #X and y are used to store training data
            y = np.empty([self.Budget])

            for period in range(num_period):
                counter = 0

                #display ad
                for j in range(self.J):  #for each website
                    for b in range(budget_for_each_website):
                        beta_j = np.random.multivariate_normal(
                            prior_beta_bar, prior_Sigma)  #sample a bete_j
                        to_select = 0
                        max = 1 / (1 + np.exp(-np.dot(self.x[0], beta_j.T)))
                        for i in range(
                                1, self.K
                        ):  #choose the best arm under sampled beta_j
                            beta_j = np.random.multivariate_normal(
                                prior_beta_bar, prior_Sigma)  #sample a bete_j
                            new_value = 1 / (
                                1 + np.exp(-np.dot(self.x[i], beta_j)))
                            if new_value > max:
                                to_select, max = i, new_value
                        prob = self.mu_true[j, to_select]
                        X[period * budget_for_each_period +
                          j * budget_for_each_website + b] = self.x[
                              to_select]  #all training data is recorded
                        if np.random.random() < prob:
                            y[period * budget_for_each_period +
                              j * budget_for_each_website + b] = 1
                            counter += 1
                        else:
                            y[period * budget_for_each_period +
                              j * budget_for_each_website + b] = 0
                prior_beta_bar, prior_Sigma = bl.fit_bayes_logistic(
                    y[0:(period + 1) * budget_for_each_period],
                    X[0:(period + 1) * budget_for_each_period], prior_beta_bar,
                    prior_Sigma)
                conversion[period] = counter
                print >> output, prior_beta_bar, counter, np.average(
                    conversion[0:period + 1])
                output.flush()
            return np.average(conversion)
        elif scheme == 'Balanced':  #balanced allocation
            num_period = self.simulation_time / self.period_length
            budget_for_each_period = self.Budget / num_period
            budget_for_each_website = budget_for_each_period / self.J
            conversion = np.empty([num_period])

            budget_for_each_arm = budget_for_each_website / self.K
            for period in range(num_period):
                counter = 0

                #display ad
                for j in range(self.J):  #for each website
                    for k in range(self.K):
                        for ii in range(budget_for_each_arm):
                            prob = self.mu_true[j, k]
                            counter += 1 if np.random.random() < prob else 0
                conversion[period] = counter
                print >> output, counter, np.average(conversion[0:period + 1])
            return np.average(conversion)

        elif scheme == 'Perfect':  #scheme with perfect information
            best_arm = np.argmax(self.mu_true, axis=1)
            num_period = self.simulation_time / self.period_length
            budget_for_each_period = self.Budget / num_period
            budget_for_each_website = budget_for_each_period / self.J
            conversion = np.empty([num_period])
            for period in range(num_period):
                counter = 0

                #display ad
                for j in range(self.J):  #for each website
                    for ii in range(budget_for_each_website):
                        prob = self.mu_true[j, best_arm[j]]
                        counter += 1 if np.random.random() < prob else 0
                conversion[period] = counter
                print >> output, counter, np.average(conversion[0:period + 1])
            return np.average(conversion)
        elif scheme == 'Test_rollout_Unpooled':
            num_period = self.simulation_time / self.period_length
            budget_for_each_period = self.Budget / num_period
            budget_for_each_website = budget_for_each_period / self.J
            budget_for_each_arm = budget_for_each_website / self.K

            conversion = np.empty([num_period])
            tau = num_period / 5  #length of exploration periods

            accumulated_conversion = np.empty([self.J, self.K])
            for period in range(tau):
                counter = 0

                #display ad
                for j in range(self.J):  #for each website
                    for k in range(self.K):  #for each arm
                        for ii in range(budget_for_each_arm):
                            prob = self.mu_true[j, k]
                            if np.random.random() < prob:
                                counter += 1
                                accumulated_conversion[j, k] += 1
                conversion[period] = counter
                print >> output, counter, np.average(conversion[0:period + 1])

            best_arm = np.argmax(accumulated_conversion, axis=1)
            for period in range(tau, num_period):
                counter = 0

                #display ad
                for j in range(self.J):  #for each website
                    for ii in range(budget_for_each_website):
                        prob = self.mu_true[j, best_arm[j]]
                        counter += 1 if np.random.random() < prob else 0
                conversion[period] = counter
                print >> output, counter, np.average(conversion[0:period + 1])
            return np.average(conversion)
        elif scheme == 'Test_rollout_Pooled':
            num_period = self.simulation_time / self.period_length
            budget_for_each_period = self.Budget / num_period
            budget_for_each_website = budget_for_each_period / self.J
            budget_for_each_arm = budget_for_each_website / self.K

            conversion = np.empty([num_period])
            tau = num_period / 5  #length of exploration

            accumulated_conversion = np.empty([self.K])
            for period in range(tau):
                counter = 0

                #display ad
                for j in range(self.J):  #for each website
                    for k in range(self.K):  #for each arm
                        for ii in range(budget_for_each_arm):
                            prob = self.mu_true[j, k]
                            if np.random.random() < prob:
                                counter += 1
                                accumulated_conversion[k] += 1
                conversion[period] = counter
                print >> output, counter, np.average(conversion[0:period + 1])

            best_arm = np.argmax(accumulated_conversion)
            for period in range(tau, num_period):
                counter = 0

                #display ad
                for j in range(self.J):  #for each website
                    for ii in range(budget_for_each_website):
                        prob = self.mu_true[j, best_arm]
                        counter += 1 if np.random.random() < prob else 0
                conversion[period] = counter
                print >> output, counter, np.average(conversion[0:period + 1])
            return np.average(conversion)
        elif scheme == 'Greedy_Pooled':
            conversion_numerator = np.zeros(
                self.K
            )  # number of conversions, the same in the following code
            conversion_denominator = np.zeros(
                self.K)  #number of impressions, the same in the following code

            num_period = self.simulation_time / self.period_length
            budget_for_each_period = self.Budget / num_period
            budget_for_each_website = budget_for_each_period / self.J
            conversion = np.empty([num_period])

            for period in range(num_period):
                counter = 0
                #display ad
                for j in range(self.J):  #for each website
                    for ii in range(budget_for_each_website):
                        # 0.00001 is added in denominator to avoid the problem of deviding by zero
                        # 0.00001 is also added in numerator such that the original reward is 1, which guarantees that every arm will be explored
                        # the same in the following code
                        best_arm = np.argmax(
                            (conversion_numerator + 0.00001) /
                            (conversion_denominator + 0.00001))
                        prob = self.mu_true[j, best_arm]
                        if np.random.random() < prob:
                            conversion_numerator[best_arm] += 1
                            counter += 1
                        conversion_denominator[best_arm] += 1
                conversion[period] = counter
                print >> output, counter, np.average(conversion[0:period + 1])
            return np.average(conversion)
        elif scheme == 'Greedy_Unpooled':
            conversion_numerator = np.zeros([self.J, self.K])
            conversion_denominator = np.zeros([self.J, self.K])

            num_period = self.simulation_time / self.period_length
            budget_for_each_period = self.Budget / num_period
            budget_for_each_website = budget_for_each_period / self.J
            conversion = np.empty([num_period])

            for period in range(num_period):
                counter = 0
                #display ad
                for j in range(self.J):  #for each website
                    for ii in range(budget_for_each_website):
                        best_arm = np.argmax(
                            (conversion_numerator[j] + 0.00001) /
                            (conversion_denominator[j] + 0.00001))
                        prob = self.mu_true[j, best_arm]
                        if np.random.random() < prob:
                            conversion_numerator[j, best_arm] += 1
                            counter += 1
                        conversion_denominator[j, best_arm] += 1
                conversion[period] = counter
                print >> output, counter, np.average(conversion[0:period + 1])
            return np.average(conversion)
        elif scheme == 'Epsilon_Greedy_Pooled_10':
            conversion_numerator = np.zeros(self.K)
            conversion_denominator = np.zeros(self.K)
            conversion_rate = np.zeros(self.K)

            num_period = self.simulation_time / self.period_length
            budget_for_each_period = self.Budget / num_period
            budget_for_each_website = budget_for_each_period / self.J
            conversion = np.empty([num_period])

            for period in range(num_period):
                counter = 0
                #display ad
                for j in range(self.J):  #for each website
                    for ii in range(budget_for_each_website):
                        best_arm = np.argmax(
                            (conversion_numerator + 0.00001) /
                            (conversion_denominator + 0.00001))
                        if np.random.random() < 0.1:
                            best_arm = np.random.randint(0, self.K)
                        prob = self.mu_true[j, best_arm]
                        if np.random.random() < prob:
                            conversion_numerator[best_arm] += 1
                            counter += 1
                        conversion_denominator[best_arm] += 1
                conversion[period] = counter
                print >> output, counter, np.average(conversion[0:period + 1])
            return np.average(conversion)
        elif scheme == 'Epsilon_Greedy_Unpooled_10':
            conversion_numerator = np.zeros([self.J, self.K])
            conversion_denominator = np.zeros([self.J, self.K])

            num_period = self.simulation_time / self.period_length
            budget_for_each_period = self.Budget / num_period
            budget_for_each_website = budget_for_each_period / self.J
            conversion = np.empty([num_period])

            for period in range(num_period):
                counter = 0
                #display ad
                for j in range(self.J):  #for each website
                    for ii in range(budget_for_each_website):
                        best_arm = np.argmax(
                            (conversion_numerator[j] + 0.00001) /
                            (conversion_denominator[j] + 0.00001))
                        if np.random.random() < 0.1:
                            best_arm = np.random.randint(0, self.K)
                        prob = self.mu_true[j, best_arm]
                        if np.random.random() < prob:
                            conversion_numerator[j, best_arm] += 1
                            counter += 1
                        conversion_denominator[j, best_arm] += 1
                conversion[period] = counter
                print >> output, counter, np.average(conversion[0:period + 1])
            return np.average(conversion)
        elif scheme == 'UCB1_Pooled':
            conversion_numerator = np.zeros(self.K)  #number of conversion
            conversion_denominator = np.zeros(self.K)  #number of impression

            num_period = self.simulation_time / self.period_length
            budget_for_each_period = self.Budget / num_period
            budget_for_each_website = budget_for_each_period / self.J
            conversion = np.empty([num_period])

            for period in range(1):
                counter = 0
                #display ad
                for j in range(self.J):  #for each website
                    for ii in range(budget_for_each_website):
                        best_arm = np.argmax(
                            (conversion_numerator + 0.00001) /
                            (conversion_denominator + 0.00001))
                        prob = self.mu_true[j, best_arm]
                        if np.random.random() < prob:
                            conversion_numerator[best_arm] += 1
                            counter += 1
                        conversion_denominator[best_arm] += 1
                conversion[period] = counter
                print >> output, counter, np.average(conversion[0:period + 1])
            for period in range(1, num_period):
                counter = 0
                #display ad
                for j in range(self.J):  #for each website
                    for ii in range(budget_for_each_website):
                        best_arm = np.argmax(
                            (conversion_numerator + 0.00001) /
                            (conversion_denominator + 0.00001) + np.sqrt(
                                2 * np.log(period * budget_for_each_period) /
                                (conversion_numerator + 1))
                        )  # add 1 to conversion_numerator to avoid the problem of deviding by zeros
                        prob = self.mu_true[j, best_arm]
                        if np.random.random() < prob:
                            conversion_numerator[best_arm] += 1
                            counter += 1
                        conversion_denominator[best_arm] += 1
                conversion[period] = counter
                print >> output, counter, np.average(conversion[0:period + 1])
            return np.average(conversion)
        elif scheme == 'UCB1_Unpooled':
            conversion_numerator = np.zeros([self.J, self.K])
            conversion_denominator = np.zeros([self.J, self.K])

            num_period = self.simulation_time / self.period_length
            budget_for_each_period = self.Budget / num_period
            budget_for_each_website = budget_for_each_period / self.J
            conversion = np.empty([num_period])

            for period in range(1):
                counter = 0
                #display ad
                for j in range(self.J):  #for each website
                    for ii in range(budget_for_each_website):
                        best_arm = np.argmax(
                            (conversion_numerator[j] + 0.00001) /
                            (conversion_denominator[j] + 0.00001))
                        prob = self.mu_true[j, best_arm]
                        if np.random.random() < prob:
                            conversion_numerator[j, best_arm] += 1
                            counter += 1
                        conversion_denominator[j, best_arm] += 1
                conversion[period] = counter
                print >> output, counter, np.average(conversion[0:period + 1])
            for period in range(1, num_period):
                counter = 0
                #display ad
                for j in range(self.J):  #for each website
                    for ii in range(budget_for_each_website):
                        best_arm = np.argmax(
                            (conversion_numerator[j] + 0.00001) /
                            (conversion_denominator[j] + 0.00001) +
                            np.sqrt(2 *
                                    np.log(period * budget_for_each_website) /
                                    (conversion_numerator[j] + 1)))
                        prob = self.mu_true[j, best_arm]
                        if np.random.random() < prob:
                            conversion_numerator[j, best_arm] += 1
                            counter += 1
                        conversion_denominator[j, best_arm] += 1
                conversion[period] = counter
                print >> output, counter, np.average(conversion[0:period + 1])
            return np.average(conversion)
        elif scheme == 'UCB1_Tuned_Pooled':
            conversion_numerator = np.zeros(self.K)  #number of conversion
            conversion_denominator = np.zeros(self.K)  #number of impression

            num_period = self.simulation_time / self.period_length
            budget_for_each_period = self.Budget / num_period
            budget_for_each_website = budget_for_each_period / self.J
            conversion = np.empty([num_period])

            for period in range(1):
                counter = 0
                #display ad
                for j in range(self.J):  #for each website
                    for ii in range(budget_for_each_website):
                        best_arm = np.argmax(
                            (conversion_numerator + 0.00001) /
                            (conversion_denominator + 0.00001))
                        prob = self.mu_true[j, best_arm]
                        if np.random.random() < prob:
                            conversion_numerator[best_arm] += 1
                            counter += 1
                        conversion_denominator[best_arm] += 1
                conversion[period] = counter
                print >> output, counter, np.average(conversion[0:period + 1])
            for period in range(1, num_period):
                counter = 0
                #display ad
                for j in range(self.J):  #for each website
                    for ii in range(budget_for_each_website):
                        variance = conversion_numerator / conversion_denominator * (
                            1 - conversion_numerator / conversion_denominator)
                        V_kt = variance + np.sqrt(
                            2 * np.log(period * budget_for_each_period) /
                            (conversion_numerator + 1))
                        min = np.minimum(V_kt, np.zeros_like(V_kt) + 0.25)
                        best_arm = np.argmax(
                            (conversion_numerator + 0.00001) /
                            (conversion_denominator + 0.00001) +
                            np.sqrt(min *
                                    np.log(period * budget_for_each_period) /
                                    (conversion_numerator + 1)))
                        prob = self.mu_true[j, best_arm]
                        if np.random.random() < prob:
                            conversion_numerator[best_arm] += 1
                            counter += 1
                        conversion_denominator[best_arm] += 1
                conversion[period] = counter
                print >> output, counter, np.average(conversion[0:period + 1])
            return np.average(conversion)
        elif scheme == 'UCB1_Tuned_Unpooled':
            conversion_numerator = np.zeros([self.J,
                                             self.K])  #number of conversion
            conversion_denominator = np.zeros([self.J,
                                               self.K])  #number of impression

            num_period = self.simulation_time / self.period_length
            budget_for_each_period = self.Budget / num_period
            budget_for_each_website = budget_for_each_period / self.J
            conversion = np.empty([num_period])

            for period in range(1):
                counter = 0
                #display ad
                for j in range(self.J):  #for each website
                    for ii in range(budget_for_each_website):
                        best_arm = np.argmax(
                            (conversion_numerator[j] + 0.00001) /
                            (conversion_denominator[j] + 0.00001))
                        prob = self.mu_true[j, best_arm]
                        if np.random.random() < prob:
                            conversion_numerator[j, best_arm] += 1
                            counter += 1
                        conversion_denominator[j, best_arm] += 1
                conversion[period] = counter
                print >> output, counter, np.average(conversion[0:period + 1])
            for period in range(1, num_period):
                counter = 0
                #display ad
                for j in range(self.J):  #for each website
                    for ii in range(budget_for_each_website):
                        variance = conversion_numerator[
                            j] / conversion_denominator[j] * (
                                1 - conversion_numerator[j] /
                                conversion_denominator[j])
                        V_kt = variance + np.sqrt(
                            2 * np.log(period * budget_for_each_website) /
                            (conversion_numerator[j] + 1))
                        min = np.minimum(V_kt, np.zeros_like(V_kt) + 0.25)
                        best_arm = np.argmax(
                            (conversion_numerator[j] + 0.00001) /
                            (conversion_denominator[j] + 0.00001) +
                            np.sqrt(min *
                                    np.log(period * budget_for_each_website) /
                                    (conversion_numerator[j] + 1)))
                        prob = self.mu_true[j, best_arm]
                        if np.random.random() < prob:
                            conversion_numerator[j, best_arm] += 1
                            counter += 1
                        conversion_denominator[j, best_arm] += 1
                conversion[period] = counter
                print >> output, counter, np.average(conversion[0:period + 1])
            return np.average(conversion)
        elif scheme == 'Gittins_Pooled':  #TODO
            return 0
            pass
        elif scheme == 'Gittins_Unpooled':  #TODO
            return 0
            pass
    N1,
])  # bias term
for _ in np.arange(5):
    X1[:, _ + 6] = (X1[:, _ + 1] * X1[:, _ + 6]
                    )  # this is where impose the correlation

# ------------------------------------------------------------------------------
# make a parameter vector
w_true = np.random.uniform(-0.5, 0.5, p)
w_true[0] = -1  # bias parameter

# ------------------------------------------------------------------------------
# make some binary responses
mu = bl.logistic_prob(X1, w_true)
y1 = np.empty([N1])
for _ in np.arange(N1):
    y1[_] = np.random.binomial(1, mu[_])

# to get going, set a prior parameter of zeros, and a diagonal hessian
w_prior = np.zeros(p)
H_prior = np.diag(np.ones(p)) * 0.001

#----------------------------------------------------------------------------------------
# Do a bayesian fit with this random sample
# The default uses a full Hessian matrix and a Newton's conjugate gradient solver
w_posterior, H_posterior = bl.fit_bayes_logistic(y1, X1, w_prior, H_prior)
logistic_prob = bl.logistic_prob(X1, w_posterior)

print(r2_score(mu, logistic_prob))
print(roc_auc_score(y1, logistic_prob))
Exemplo n.º 3
0
    def simulate(self,seed,scheme = 'TS_HGLM'):
        output = open('result/'+scheme+'_seed_'+str(seed)+'.txt','w')
        if scheme == 'TS_HGLM':#Thompson sampling with a hierarchical generalized linear model (partial pooling)
            np.random.seed(seed)
            prior_beta_bar = np.random.normal(-1,1,self.d)
            tmp = np.random.rand(Scheme.d,Scheme.d)
            prior_Sigma = np.dot(tmp,tmp.T)
            print>>output, prior_beta_bar
            output.flush()


            num_period = self.simulation_time/self.period_length
            budget_for_each_period = self.Budget/num_period
            budget_for_each_website = budget_for_each_period/self.J
            conversion = np.empty([num_period])#used to store conversion in each period

            X = np.empty([self.Budget,self.d])#X and y are used to store training data
            y = np.empty([self.Budget])

            for period in range(num_period):
                counter = 0

                #display ad
                for j in range(self.J):#for each website
                    for b in range(budget_for_each_website):
                        beta_j = np.random.multivariate_normal(prior_beta_bar,prior_Sigma)#sample a bete_j
                        to_select = 0
                        max = 1/(1+np.exp(-np.dot(self.x[0],beta_j.T)))
                        for i in range(1,self.K):#choose the best arm under sampled beta_j
                            beta_j = np.random.multivariate_normal(prior_beta_bar,prior_Sigma)#sample a bete_j
                            new_value = 1/(1+np.exp(-np.dot(self.x[i],beta_j)))
                            if  new_value > max:
                                to_select, max = i, new_value
                        prob = self.mu_true[j,to_select]
                        X[period*budget_for_each_period + j*budget_for_each_website+b] = self.x[to_select]#all training data is recorded
                        if np.random.random()<prob:
                            y[period*budget_for_each_period + j*budget_for_each_website+b] = 1
                            counter += 1
                        else:
                            y[period*budget_for_each_period + j*budget_for_each_website+b] = 0
                prior_beta_bar, prior_Sigma = bl.fit_bayes_logistic(y[0:(period+1)*budget_for_each_period], X[0:(period+1)*budget_for_each_period], prior_beta_bar, prior_Sigma)
                conversion[period] = counter
                print>>output, prior_beta_bar,counter,np.average(conversion[0:period+1])
                output.flush()
            return np.average(conversion)
        elif scheme == 'Balanced':#balanced allocation
            num_period = self.simulation_time/self.period_length
            budget_for_each_period = self.Budget/num_period
            budget_for_each_website = budget_for_each_period/self.J
            conversion = np.empty([num_period])

            budget_for_each_arm = budget_for_each_website/self.K
            for period in range(num_period):
                counter = 0

                #display ad
                for j in range(self.J):#for each website
                    for k in range(self.K):
                        for ii in range(budget_for_each_arm):
                            prob = self.mu_true[j,k]
                            counter += 1 if np.random.random()<prob else 0
                conversion[period] = counter
                print>>output, counter,np.average(conversion[0:period+1])
            return np.average(conversion)

        elif scheme == 'Perfect':#scheme with perfect information
            best_arm = np.argmax(self.mu_true,axis = 1)
            num_period = self.simulation_time/self.period_length
            budget_for_each_period = self.Budget/num_period
            budget_for_each_website = budget_for_each_period/self.J
            conversion = np.empty([num_period])
            for period in range(num_period):
                counter = 0

                #display ad
                for j in range(self.J):#for each website
                        for ii in range(budget_for_each_website):
                            prob = self.mu_true[j,best_arm[j]]
                            counter += 1 if np.random.random()<prob else 0
                conversion[period] = counter
                print>>output, counter,np.average(conversion[0:period+1])
            return np.average(conversion)
        elif scheme == 'Test_rollout_Unpooled':
            num_period = self.simulation_time/self.period_length
            budget_for_each_period = self.Budget/num_period
            budget_for_each_website = budget_for_each_period/self.J
            budget_for_each_arm = budget_for_each_website/self.K

            conversion = np.empty([num_period])
            tau = num_period/5#length of exploration periods

            accumulated_conversion = np.empty([self.J,self.K])
            for period in range(tau):
                counter = 0

                #display ad
                for j in range(self.J):#for each website
                    for k in range(self.K):#for each arm
                        for ii in range(budget_for_each_arm):
                            prob = self.mu_true[j,k]
                            if np.random.random()<prob:
                                counter += 1
                                accumulated_conversion[j,k] += 1
                conversion[period] = counter
                print>>output, counter,np.average(conversion[0:period+1])

            best_arm = np.argmax(accumulated_conversion,axis = 1)
            for period in range(tau,num_period):
                counter = 0

                #display ad
                for j in range(self.J):#for each website
                        for ii in range(budget_for_each_website):
                            prob = self.mu_true[j,best_arm[j]]
                            counter += 1 if np.random.random()<prob else 0
                conversion[period] = counter
                print>>output, counter,np.average(conversion[0:period+1])
            return np.average(conversion)
        elif scheme == 'Test_rollout_Pooled':
            num_period = self.simulation_time/self.period_length
            budget_for_each_period = self.Budget/num_period
            budget_for_each_website = budget_for_each_period/self.J
            budget_for_each_arm = budget_for_each_website/self.K

            conversion = np.empty([num_period])
            tau = num_period/5#length of exploration

            accumulated_conversion = np.empty([self.K])
            for period in range(tau):
                counter = 0

                #display ad
                for j in range(self.J):#for each website
                    for k in range(self.K):#for each arm
                        for ii in range(budget_for_each_arm):
                            prob = self.mu_true[j,k]
                            if np.random.random()<prob:
                                counter += 1
                                accumulated_conversion[k] += 1
                conversion[period] = counter
                print>>output, counter,np.average(conversion[0:period+1])

            best_arm = np.argmax(accumulated_conversion)
            for period in range(tau,num_period):
                counter = 0

                #display ad
                for j in range(self.J):#for each website
                        for ii in range(budget_for_each_website):
                            prob = self.mu_true[j,best_arm]
                            counter += 1 if np.random.random()<prob else 0
                conversion[period] = counter
                print>>output, counter,np.average(conversion[0:period+1])
            return np.average(conversion)
        elif scheme == 'Greedy_Pooled':
            conversion_numerator = np.zeros(self.K)# number of conversions, the same in the following code
            conversion_denominator = np.zeros(self.K)#number of impressions, the same in the following code

            num_period = self.simulation_time/self.period_length
            budget_for_each_period = self.Budget/num_period
            budget_for_each_website = budget_for_each_period/self.J
            conversion = np.empty([num_period])

            for period in range(num_period):
                counter = 0
                #display ad
                for j in range(self.J):#for each website
                    for ii in range(budget_for_each_website):
                        # 0.00001 is added in denominator to avoid the problem of deviding by zero
                        # 0.00001 is also added in numerator such that the original reward is 1, which guarantees that every arm will be explored
                        # the same in the following code
                        best_arm = np.argmax((conversion_numerator+0.00001)/(conversion_denominator+0.00001))
                        prob = self.mu_true[j,best_arm]
                        if np.random.random()<prob:
                            conversion_numerator[best_arm] +=1
                            counter += 1
                        conversion_denominator[best_arm] +=1
                conversion[period] = counter
                print>>output, counter,np.average(conversion[0:period+1])
            return np.average(conversion)
        elif scheme == 'Greedy_Unpooled':
            conversion_numerator = np.zeros([self.J,self.K])
            conversion_denominator = np.zeros([self.J, self.K])

            num_period = self.simulation_time/self.period_length
            budget_for_each_period = self.Budget/num_period
            budget_for_each_website = budget_for_each_period/self.J
            conversion = np.empty([num_period])

            for period in range(num_period):
                counter = 0
                #display ad
                for j in range(self.J):#for each website
                    for ii in range(budget_for_each_website):
                        best_arm = np.argmax((conversion_numerator[j]+0.00001)/(conversion_denominator[j]+0.00001))
                        prob = self.mu_true[j,best_arm]
                        if np.random.random()<prob:
                            conversion_numerator[j,best_arm] +=1
                            counter += 1
                        conversion_denominator[j,best_arm] +=1
                conversion[period] = counter
                print>>output, counter,np.average(conversion[0:period+1])
            return np.average(conversion)
        elif scheme == 'Epsilon_Greedy_Pooled_10':
            conversion_numerator = np.zeros(self.K)
            conversion_denominator = np.zeros(self.K)
            conversion_rate = np.zeros(self.K)

            num_period = self.simulation_time/self.period_length
            budget_for_each_period = self.Budget/num_period
            budget_for_each_website = budget_for_each_period/self.J
            conversion = np.empty([num_period])

            for period in range(num_period):
                counter = 0
                #display ad
                for j in range(self.J):#for each website
                    for ii in range(budget_for_each_website):
                        best_arm = np.argmax((conversion_numerator+0.00001)/(conversion_denominator+0.00001))
                        if np.random.random()<0.1:
                            best_arm = np.random.randint(0,self.K)
                        prob = self.mu_true[j,best_arm]
                        if np.random.random()<prob:
                            conversion_numerator[best_arm] +=1
                            counter += 1
                        conversion_denominator[best_arm] +=1
                conversion[period] = counter
                print>>output, counter,np.average(conversion[0:period+1])
            return np.average(conversion)
        elif scheme == 'Epsilon_Greedy_Unpooled_10':
            conversion_numerator = np.zeros([self.J,self.K])
            conversion_denominator = np.zeros([self.J, self.K])

            num_period = self.simulation_time/self.period_length
            budget_for_each_period = self.Budget/num_period
            budget_for_each_website = budget_for_each_period/self.J
            conversion = np.empty([num_period])

            for period in range(num_period):
                counter = 0
                #display ad
                for j in range(self.J):#for each website
                    for ii in range(budget_for_each_website):
                        best_arm = np.argmax((conversion_numerator[j]+0.00001)/(conversion_denominator[j]+0.00001))
                        if np.random.random()<0.1:
                            best_arm = np.random.randint(0,self.K)
                        prob = self.mu_true[j,best_arm]
                        if np.random.random()<prob:
                            conversion_numerator[j,best_arm] +=1
                            counter += 1
                        conversion_denominator[j,best_arm] +=1
                conversion[period] = counter
                print>>output, counter,np.average(conversion[0:period+1])
            return np.average(conversion)
        elif scheme == 'UCB1_Pooled':
            conversion_numerator = np.zeros(self.K)#number of conversion
            conversion_denominator = np.zeros(self.K)#number of impression

            num_period = self.simulation_time/self.period_length
            budget_for_each_period = self.Budget/num_period
            budget_for_each_website = budget_for_each_period/self.J
            conversion = np.empty([num_period])

            for period in range(1):
                counter = 0
                #display ad
                for j in range(self.J):#for each website
                    for ii in range(budget_for_each_website):
                        best_arm = np.argmax((conversion_numerator+0.00001)/(conversion_denominator+0.00001))
                        prob = self.mu_true[j,best_arm]
                        if np.random.random()<prob:
                            conversion_numerator[best_arm] +=1
                            counter += 1
                        conversion_denominator[best_arm] +=1
                conversion[period] = counter
                print>>output, counter,np.average(conversion[0:period+1])
            for period in range(1,num_period):
                counter = 0
                #display ad
                for j in range(self.J):#for each website
                    for ii in range(budget_for_each_website):
                        best_arm = np.argmax((conversion_numerator+0.00001)/(conversion_denominator+0.00001)+np.sqrt(2*np.log(period*budget_for_each_period)/(conversion_numerator+1)))# add 1 to conversion_numerator to avoid the problem of deviding by zeros
                        prob = self.mu_true[j,best_arm]
                        if np.random.random()<prob:
                            conversion_numerator[best_arm] +=1
                            counter += 1
                        conversion_denominator[best_arm] +=1
                conversion[period] = counter
                print>>output, counter,np.average(conversion[0:period+1])
            return np.average(conversion)
        elif scheme == 'UCB1_Unpooled':
            conversion_numerator = np.zeros([self.J,self.K])
            conversion_denominator = np.zeros([self.J,self.K])


            num_period = self.simulation_time/self.period_length
            budget_for_each_period = self.Budget/num_period
            budget_for_each_website = budget_for_each_period/self.J
            conversion = np.empty([num_period])

            for period in range(1):
                counter = 0
                #display ad
                for j in range(self.J):#for each website
                    for ii in range(budget_for_each_website):
                        best_arm = np.argmax((conversion_numerator[j]+0.00001)/(conversion_denominator[j]+0.00001))
                        prob = self.mu_true[j,best_arm]
                        if np.random.random()<prob:
                            conversion_numerator[j,best_arm] +=1
                            counter += 1
                        conversion_denominator[j,best_arm] +=1
                conversion[period] = counter
                print>>output, counter,np.average(conversion[0:period+1])
            for period in range(1,num_period):
                counter = 0
                #display ad
                for j in range(self.J):#for each website
                    for ii in range(budget_for_each_website):
                        best_arm = np.argmax((conversion_numerator[j]+0.00001)/(conversion_denominator[j]+0.00001)+np.sqrt(2*np.log(period*budget_for_each_website)/(conversion_numerator[j]+1)))
                        prob = self.mu_true[j,best_arm]
                        if np.random.random()<prob:
                            conversion_numerator[j,best_arm] +=1
                            counter += 1
                        conversion_denominator[j,best_arm] +=1
                conversion[period] = counter
                print>>output, counter,np.average(conversion[0:period+1])
            return np.average(conversion)
        elif scheme == 'UCB1_Tuned_Pooled':
            conversion_numerator = np.zeros(self.K)#number of conversion
            conversion_denominator = np.zeros(self.K)#number of impression

            num_period = self.simulation_time/self.period_length
            budget_for_each_period = self.Budget/num_period
            budget_for_each_website = budget_for_each_period/self.J
            conversion = np.empty([num_period])

            for period in range(1):
                counter = 0
                #display ad
                for j in range(self.J):#for each website
                    for ii in range(budget_for_each_website):
                        best_arm = np.argmax((conversion_numerator+0.00001)/(conversion_denominator+0.00001))
                        prob = self.mu_true[j,best_arm]
                        if np.random.random()<prob:
                            conversion_numerator[best_arm] +=1
                            counter += 1
                        conversion_denominator[best_arm] +=1
                conversion[period] = counter
                print>>output, counter,np.average(conversion[0:period+1])
            for period in range(1,num_period):
                counter = 0
                #display ad
                for j in range(self.J):#for each website
                    for ii in range(budget_for_each_website):
                        variance = conversion_numerator/conversion_denominator*(1-conversion_numerator/conversion_denominator)
                        V_kt = variance + np.sqrt(2*np.log(period*budget_for_each_period)/(conversion_numerator+1))
                        min = np.minimum(V_kt,np.zeros_like(V_kt)+0.25)
                        best_arm = np.argmax((conversion_numerator+0.00001)/(conversion_denominator+0.00001)+np.sqrt(min*np.log(period*budget_for_each_period)/(conversion_numerator+1)))
                        prob = self.mu_true[j,best_arm]
                        if np.random.random()<prob:
                            conversion_numerator[best_arm] +=1
                            counter += 1
                        conversion_denominator[best_arm] +=1
                conversion[period] = counter
                print>>output, counter,np.average(conversion[0:period+1])
            return np.average(conversion)
        elif scheme == 'UCB1_Tuned_Unpooled':
            conversion_numerator = np.zeros([self.J,self.K])#number of conversion
            conversion_denominator = np.zeros([self.J,self.K])#number of impression

            num_period = self.simulation_time/self.period_length
            budget_for_each_period = self.Budget/num_period
            budget_for_each_website = budget_for_each_period/self.J
            conversion = np.empty([num_period])

            for period in range(1):
                counter = 0
                #display ad
                for j in range(self.J):#for each website
                    for ii in range(budget_for_each_website):
                        best_arm = np.argmax((conversion_numerator[j]+0.00001)/(conversion_denominator[j]+0.00001))
                        prob = self.mu_true[j,best_arm]
                        if np.random.random()<prob:
                            conversion_numerator[j,best_arm] +=1
                            counter += 1
                        conversion_denominator[j,best_arm] +=1
                conversion[period] = counter
                print>>output, counter,np.average(conversion[0:period+1])
            for period in range(1,num_period):
                counter = 0
                #display ad
                for j in range(self.J):#for each website
                    for ii in range(budget_for_each_website):
                        variance = conversion_numerator[j]/conversion_denominator[j]*(1-conversion_numerator[j]/conversion_denominator[j])
                        V_kt = variance + np.sqrt(2*np.log(period*budget_for_each_website)/(conversion_numerator[j]+1))
                        min = np.minimum(V_kt,np.zeros_like(V_kt)+0.25)
                        best_arm = np.argmax((conversion_numerator[j]+0.00001)/(conversion_denominator[j]+0.00001)+np.sqrt(min*np.log(period*budget_for_each_website)/(conversion_numerator[j]+1)))
                        prob = self.mu_true[j,best_arm]
                        if np.random.random()<prob:
                            conversion_numerator[j,best_arm] +=1
                            counter += 1
                        conversion_denominator[j,best_arm] +=1
                conversion[period] = counter
                print>>output, counter,np.average(conversion[0:period+1])
            return np.average(conversion)
        elif scheme == 'Gittins_Pooled':#TODO
            return 0
            pass
        elif scheme == 'Gittins_Unpooled':#TODO
            return 0
            pass
        ## bayesian logistic regressions

        # Set the number of data points and variables
        N = new_nn_train_array.shape[0]
        p = new_nn_train_array.shape[1]
        # randomly permute the data
        r = np.random.permutation(N)
        X = new_nn_train_array[r,:]
        y = new_nn_label_array[r]
        idx = np.arange(int(N/10))

        w_prior = np.zeros(p)
        H_prior = np.diag(np.ones(p))*0.001

        w_posterior, H_posterior = bl.fit_bayes_logistic(y[idx], X[idx, :], w_prior, H_prior)

        # Now make this posterior our new prior
        w_prior = copy.copy(w_posterior)
        H_prior = copy.copy(H_posterior)

        # get the logistic and moderated logistic probabilities
        test_p = np.array([x])
        bayes_prob_lin = bl.bayes_logistic_prob(test_p,w_posterior,H_posterior)

        if bayes_prob_lin[0] > 0.50:
            pred_br = label_1
        else:
            pred_br = label_0

        if pred_br == t:
Exemplo n.º 5
0
plt.title("Log-Unnormalised Posterior")
j2 = np.argmax(log_joint)
wb = W[j2][:]
plt.scatter(wb[0], wb[1], c='red', s=100)
plt.grid()
pml.savefig("logreg_laplace_unnormalised_posterior.pdf", dpi=300)

#Plotting the Laplace approximation to posterior
plt.figure(3)
#https://bayes-logistic.readthedocs.io/en/latest/usage.html
#Visit the website above to access the source code of bayes_logistic library
#parameter info : bayes_logistic.fit_bayes_logistic(y, X, wprior, H, weights=None, solver='Newton-CG', bounds=None, maxiter=100)
wfit, hfit = bayes_logistic.fit_bayes_logistic(t.reshape((N * D)),
                                               X,
                                               np.zeros(D),
                                               ((np.identity(D)) * 1 / alpha),
                                               weights=None,
                                               solver='Newton-CG',
                                               bounds=None,
                                               maxiter=100)
co = np.linalg.inv(hfit)
#wfit represents the posterior parameters (MAP estimate)
#hfit represents the posterior Hessian  (Hessian of negative log posterior evaluated at MAP parameters)
log_laplace_posterior = np.log(multivariate_normal.pdf(W, mean=wfit, cov=co))
plt.contour(xx, yy, -1 * log_laplace_posterior.reshape((n, n)), 30)
plt.scatter(wb[0], wb[1], c='red', s=100)
plt.title("Laplace Approximation to Posterior")
plt.grid()
pml.savefig("logreg_laplace_posterior.pdf", dpi=300)

#Plotting the predictive distribution for logistic regression
plt.figure(5)
def main():
    np.random.seed(135)
    #Creating data
    N = 30
    D = 2
    mu1 = np.hstack((np.ones((N, 1)), 5 * np.ones((N, 1))))
    mu2 = np.hstack((-5 * np.ones((N, 1)), np.ones((N, 1))))
    class1_std = 1
    class2_std = 1.1
    X_1 = np.add(class1_std * np.random.randn(N, 2), mu1)
    X_2 = np.add(2 * class2_std * np.random.randn(N, 2), mu2)
    X = np.vstack((X_1, X_2))
    t = np.vstack((np.ones((N, 1)), np.zeros((N, 1))))

    #Plotting data
    x_1, y_1 = X[np.where(t == 1)[0]].T
    x_2, y_2 = X[np.where(t == 0)[0]].T
    plt.figure(0)
    plt.scatter(x_1, y_1, c='red', s=20, marker='o')
    plt.scatter(x_2, y_2, c='blue', s=20, marker='o')

    #Plotting Predictions
    alpha = 100
    Range = 8
    step = 0.1
    xx, yy = np.meshgrid(np.arange(-Range, Range, step),
                         np.arange(-Range, Range, step))
    [n, n] = xx.shape
    W = np.hstack((xx.reshape((n * n, 1)), yy.reshape((n * n, 1))))
    Xgrid = W
    ws = np.array([[3, 1], [4, 2], [5, 3], [7, 3]])
    col = ['black', 'red', 'green', 'blue']
    for ii in range(ws.shape[0]):
        w = ws[ii][:]
        pred = 1.0 / (1 + np.exp(np.dot(-Xgrid, w)))
        plt.contour(xx, yy, pred.reshape((n, n)), 1, colors=col[ii])
    plt.title("data")
    plt.savefig("logregLaplaceGirolamiDemo_data.png", dpi=300)

    #Plot prior, likelihood, posterior

    Xt = np.transpose(X)
    f = np.dot(W, Xt)
    log_prior = np.log(multivariate_normal.pdf(W,
                                               cov=(np.identity(D)) * alpha))

    log_like = np.dot(np.dot(W, Xt), t) - np.sum(np.log(1 + np.exp(f)),
                                                 1).reshape((n * n, 1))
    log_joint = log_like.reshape((n * n, 1)) + log_prior.reshape((n * n, 1))

    #Plotting log-prior
    #plt.figure(1)
    #plt.contour(xx, yy, -1*log_prior.reshape((n,n)), 30)
    #plt.title("Log-Prior")

    plt.figure(1)
    plt.contour(xx, yy, -1 * log_like.reshape((n, n)), 30)
    plt.title("Log-Likelihood")

    #Plotting points corresponding to chosen lines
    for ii in range(0, ws.shape[0]):
        w = np.transpose(ws[ii, :])
        plt.annotate(str(ii + 1), xy=(w[0], w[1]), color=col[ii])

    j = np.argmax(log_like)
    wmle = W[j, :]
    slope = wmle[1] / wmle[0]
    #plt.axline([wmle[0], wmle[1]], slope=slope)

    plt.plot([0, 7.9], [0, 7.9 * slope])
    plt.grid()
    plt.savefig("logregLaplaceGirolamiDemo_LogLikelihood.png", dpi=300)

    #Plotting the log posterior(Unnormalised
    plt.figure(2)
    plt.contour(xx, yy, -1 * log_joint.reshape((n, n)), 30)
    plt.title("Log-Unnormalised Posterior")
    j2 = np.argmax(log_joint)
    wb = W[j2][:]
    plt.scatter(wb[0], wb[1], c='red', s=100)
    plt.grid()
    plt.savefig("logregLaplaceGirolamiDemo_LogUnnormalisedPosterior.png",
                dpi=300)

    #Plotting the Laplace approximation to posterior
    plt.figure(3)
    #https://bayes-logistic.readthedocs.io/en/latest/usage.html
    #Visit the website above to access the source code of bayes_logistic library
    #parameter info : bayes_logistic.fit_bayes_logistic(y, X, wprior, H, weights=None, solver='Newton-CG', bounds=None, maxiter=100)
    wfit, hfit = bayes_logistic.fit_bayes_logistic(
        t.reshape((N * D)),
        X,
        np.zeros(D), ((np.identity(D)) * 1 / alpha),
        weights=None,
        solver='Newton-CG',
        bounds=None,
        maxiter=100)
    co = np.linalg.inv(hfit)
    #wfit represents the posterior parameters (MAP estimate)
    #hfit represents the posterior Hessian  (Hessian of negative log posterior evaluated at MAP parameters)
    log_laplace_posterior = np.log(
        multivariate_normal.pdf(W, mean=wfit, cov=co))
    plt.contour(xx, yy, -1 * log_laplace_posterior.reshape((n, n)), 30)
    plt.scatter(wb[0], wb[1], c='red', s=100)
    plt.title("Laplace Approximation to Posterior")
    plt.grid()
    plt.savefig(
        "logregLaplaceGirolamiDemo_LaplaceApproximationtoPosterior.png",
        dpi=300)

    #Plotting the predictive distribution for logistic regression
    plt.figure(5)
    pred = 1.0 / (1 + np.exp(np.dot(-Xgrid, wfit)))
    plt.contour(xx, yy, pred.reshape((n, n)), 30)
    x_1, y_1 = X[np.where(t == 1)[0]].T
    x_2, y_2 = X[np.where(t == 0)[0]].T
    plt.scatter(x_1, y_1, c='red', s=20, marker='o')
    plt.scatter(x_2, y_2, c='blue', s=40, marker='o')
    plt.title("p(y=1|x, wMAP)")
    plt.savefig("logregLaplaceGirolamiDemo_preddistlogit.png", dpi=300)

    #Decision boundary for sampled w
    plt.figure(6)
    plt.scatter(x_1, y_1, c='red', s=20, marker='o')
    plt.scatter(x_2, y_2, c='blue', s=20, marker='o')
    predm = np.zeros((n * n, 1))
    s = 100
    for i in range(s):
        wsamp = np.random.multivariate_normal(mean=wfit, cov=co)
        pred = 1.0 / (1 + np.exp(np.dot(-Xgrid, wsamp)))
        predm = np.add(predm, pred.reshape((n * n, 1)))
        plt.contour(xx, yy, pred.reshape((n, n)), np.array([0.5]))
    plt.title("decision boundary for sampled w")
    plt.savefig("logregLaplaceGirolamiDemo_decisionboundarysampledw.png",
                dpi=300)

    #MC
    plt.figure(7)
    predm = predm / s
    plt.contour(xx, yy, predm.reshape((n, n)), 30)
    plt.scatter(x_1, y_1, c='red', s=20, marker='o')
    plt.scatter(x_2, y_2, c='blue', s=20, marker='o')
    plt.title("MC approx of p(y=1|x)")
    plt.savefig("logregLaplaceGirolamiDemo_MonteCarloApprox.png", dpi=300)

    #Numerical
    plt.figure(8)
    plt.scatter(x_1, y_1, c='red', s=20, marker='o')
    plt.scatter(x_2, y_2, c='blue', s=20, marker='o')
    pr = bayes_logistic.bayes_logistic_prob(Xgrid, wfit, hfit)
    plt.contour(xx, yy, pr.reshape((n, n)), 30)
    plt.title("numerical approx of p(y=1|x)")
    plt.savefig("logregLaplaceGirolamiDemo_logitprob.png", dpi=300)

    plt.show()