def simulateArticlePool(self): articles = [] articles_id = {} mask = self.generateMasks() if self.ArticleGroups > 1: for i in range(self.ArticleGroups): articles_id[i] = range( (self.n_articles * i) / self.ArticleGroups, (self.n_articles * (i + 1)) / self.ArticleGroups) for key in articles_id[i]: featureVector = np.multiply( featureUniform(self.dimension, {}), mask[i]) l2_norm = np.linalg.norm(featureVector, ord=2) articles.append(Article(key, featureVector / l2_norm)) else: for i in range(self.n_articles): featureVector = featureUniform(self.dimension, {}) l2_norm = np.linalg.norm(featureVector, ord=2) articles.append(Article(i, featureVector / l2_norm)) return articles
def initializeThetas(self, n, dims): thetas = [] for i in range(n): thetaVector = featureUniform(dims, argv={'l2_limit': 1}) l2_norm = np.linalg.norm(thetaVector, ord=2) thetas.append(thetaVector / l2_norm) #print(thetas) return thetas
def simulateArticlePool(self, n_articles): articles = [] articles_id = range(n_articles) startTimes = [0 for x in range(n_articles)] endTimes = [self.iterations for x in range(n_articles)] for key, st, ed in zip(articles_id, startTimes, endTimes): articles.append( Article(key, st, ed, featureUniform(self.dimension))) articles[-1].theta = gaussianFeature(self.dimension, scaled=True) return articles
def initiateEnvironment(self): if self.type == "evolveTheta": for x in self.articles: # "Find a random direction" x.testVars["deltaTheta"] = (featureUniform(self.dimension) - x.theta) # "Make the change vector of with stepSize norm" x.testVars[ "deltaTheta"] = x.testVars["deltaTheta"] / np.linalg.norm( x.testVars["deltaTheta"] ) * self.environmentVars["stepSize"]
def simulateArticlePool(self): articles = [] articles_id = {} mask = self.generateMasks() if self.ArticleGroups > 1: for i in range(self.ArticleGroups): articles_id[i] = range((self.n_articles*i)/self.ArticleGroups, (self.n_articles*(i+1))/self.ArticleGroups) for key in articles_id[i]: featureVector = np.multiply(featureUniform(self.dimension, {}), mask[i]) l2_norm = np.linalg.norm(featureVector, ord =2) articles.append(Article(key, featureVector/l2_norm )) else: for i in range(self.n_articles): featureVector = featureUniform(self.dimension, {}) l2_norm = np.linalg.norm(featureVector, ord =2) articles.append(Article(i, featureVector/l2_norm )) return articles
def simulateArticlePool(self): articles = [] articles_id = {} mask = self.generateMasks() for i in range(self.ArticleGroups): articles_id[i] = range( (self.n_articles * i) / self.ArticleGroups, (self.n_articles * (i + 1)) / self.ArticleGroups) for key in articles_id[i]: featureVector = np.multiply(featureUniform(self.dimension, {}), mask[i]) l2_norm = np.linalg.norm(featureVector, ord=2) articles.append(Article(key, featureVector / l2_norm)) # Hardcode five article groups ''' articles_id_1 = range(self.n_articles/5) articles_id_2 = range(self.n_articles/5,self.n_articles*2/5) articles_id_3 = range((self.n_articles*2)/5,(self.n_articles*3)/5) articles_id_4 = range(self.n_articles*3/5,self.n_articles*4/5) articles_id_5 = range(self.n_articles*4/5,self.n_articles*5/5) mask1 = [1,1,0,0,0] mask2 = [1,0,0,0,1] mask3 = [0,0,0,1,1] mask4 = [1,0,1,0,0] mask5 = [0,1,0,1,0] for key in articles_id_1: articles.append(Article(key, np.multiply(featureUniform(self.dimension, {}), mask1))) for key in articles_id_2: articles.append(Article(key, np.multiply(featureUniform(self.dimension, {}), mask2))) for key in articles_id_3: articles.append(Article(key, np.multiply(featureUniform(self.dimension,{}), mask3))) for key in articles_id_4: articles.append(Article(key, np.multiply(featureUniform(self.dimension,{}), mask4))) for key in articles_id_5: articles.append(Article(key, np.multiply(featureUniform(self.dimension,{}), mask5))) ''' return articles
def simulateArticlePool(self): articles = [] articles_id = {} mask = self.generateMasks() for i in range(self.ArticleGroups): articles_id[i] = range((self.n_articles*i)/self.ArticleGroups, (self.n_articles*(i+1))/self.ArticleGroups) for key in articles_id[i]: featureVector = np.multiply(featureUniform(self.dimension, {}), mask[i]) l2_norm = np.linalg.norm(featureVector, ord =2) articles.append(Article(key, featureVector/l2_norm )) # Hardcode five article groups ''' articles_id_1 = range(self.n_articles/5) articles_id_2 = range(self.n_articles/5,self.n_articles*2/5) articles_id_3 = range((self.n_articles*2)/5,(self.n_articles*3)/5) articles_id_4 = range(self.n_articles*3/5,self.n_articles*4/5) articles_id_5 = range(self.n_articles*4/5,self.n_articles*5/5) mask1 = [1,1,0,0,0] mask2 = [1,0,0,0,1] mask3 = [0,0,0,1,1] mask4 = [1,0,1,0,0] mask5 = [0,1,0,1,0] for key in articles_id_1: articles.append(Article(key, np.multiply(featureUniform(self.dimension, {}), mask1))) for key in articles_id_2: articles.append(Article(key, np.multiply(featureUniform(self.dimension, {}), mask2))) for key in articles_id_3: articles.append(Article(key, np.multiply(featureUniform(self.dimension,{}), mask3))) for key in articles_id_4: articles.append(Article(key, np.multiply(featureUniform(self.dimension,{}), mask4))) for key in articles_id_5: articles.append(Article(key, np.multiply(featureUniform(self.dimension,{}), mask5))) ''' return articles
def simulateUsers(self, numUsers): """users of all context arriving uniformly""" usersids = range(numUsers) for key in usersids: self.users.append(User(key, featureUniform(self.dimension)))
def runAlgorithms(self, algorithms): self.startTime = datetime.datetime.now() timeRun = self.startTime.strftime('_%m_%d_%H_%M') filenameWriteRegret = os.path.join(save_address, 'AccRegret' + timeRun + '.csv') filenameWritePara = os.path.join( save_address, 'ParameterEstimation' + timeRun + '.csv') tim_ = [] BatchCumlateRegret = {} AlgRegret = {} ThetaDiffList = {} ThetaDiff = {} Var = {} # Initialization userSize = len(self.users) for alg_name, alg in algorithms.items(): AlgRegret[alg_name] = [] BatchCumlateRegret[alg_name] = [] if alg.CanEstimateUserPreference: ThetaDiffList[alg_name] = [] Var[alg_name] = [] if self.Write_to_File: with open(filenameWriteRegret, 'w') as f: f.write('Time(Iteration)') f.write(',' + ','.join( [str(alg_name) for alg_name in algorithms.iterkeys()])) f.write('\n') with open(filenameWritePara, 'w') as f: f.write('Time(Iteration)') f.write(',' + ','.join([ str(alg_name) + 'Theta' for alg_name in ThetaDiffList.iterkeys() ])) f.write('\n') # Shuffle the candidate arm pool shuffle(self.articles) actual_changes = [0] actual_changes_value = {} ThetaList = {} arm_trueReward = {} for u in self.users: actual_changes_value[u.id] = [1] ThetaList[u.id] = [u.theta] for iter_ in range(self.testing_iterations): noise = self.noise() # prepare to record theta estimation error for a in self.articles: if a.id not in arm_trueReward: arm_trueReward[a.id] = [] arm_trueReward[a.id].append( np.dot(a.featureVector, self.users[0].theta) + noise) for alg_name, alg in algorithms.items(): if alg.CanEstimateUserPreference: ThetaDiff[alg_name] = 0 #Simulate the changes if iter_ > (actual_changes[-1] + self.change_schedule): roll = random.random() if (roll > 0.5): actual_changes.append(iter_) for u in self.users: new_theta_vector = featureUniform( 10, argv={'l2_limit': 1}) #hardcoded 5 in for now l2_norm = np.linalg.norm(new_theta_vector, ord=2) new_theta = new_theta_vector / l2_norm while (np.linalg.norm(new_theta - u.theta) < 0.9): new_theta_vector = featureUniform( 10, argv={'l2_limit': 1}) #hardcoded 5 in for now l2_norm = np.linalg.norm(new_theta_vector, ord=2) new_theta = new_theta_vector / l2_norm old_theta = u.theta u.theta = new_theta actual_changes_value[u.id].append(1) for u in self.users: self.regulateArticlePool() # select random articles noise = self.noise() OptimalReward, OptimalArticle = self.GetOptimalReward( u, self.articlePool) OptimalReward += noise for alg_name, alg in algorithms.items(): #Observe the candiate arm pool and algoirhtm makes a decision pickedArticle = alg.decide(self.articlePool, u.id) #Get the feedback from the environment reward = self.getReward(u, pickedArticle) + noise #The feedback/observation will be fed to the algorithm to further update the algorithm's model estimation alg.updateParameters(pickedArticle, reward, u.id) #Calculate and record the regret regret = OptimalReward - reward AlgRegret[alg_name].append(regret) #Update parameter estimation record if alg.CanEstimateUserPreference: ThetaDiff[alg_name] += self.getL2Diff( u.theta, alg.getTheta(u.id)) for alg_name, alg in algorithms.items(): if alg.CanEstimateUserPreference: ThetaDiffList[alg_name] += [ThetaDiff[alg_name] / userSize] if iter_ % self.batchSize == 0: self.batchRecord(iter_) tim_.append(iter_) for alg_name in algorithms.iterkeys(): BatchCumlateRegret[alg_name].append( sum(AlgRegret[alg_name])) if self.Write_to_File: with open(filenameWriteRegret, 'a+') as f: f.write(str(iter_)) f.write(',' + ','.join([ str(BatchCumlateRegret[alg_name][-1]) for alg_name in algorithms.iterkeys() ])) f.write('\n') with open(filenameWritePara, 'a+') as f: f.write(str(iter_)) f.write(',' + ','.join([ str(ThetaDiffList[alg_name][-1]) for alg_name in ThetaDiffList.iterkeys() ])) f.write('\n') print("Actual change points: " + str(actual_changes)) for alg_name in algorithms.iterkeys(): if 'dLinUCB' in alg_name: print alg_name, 'Switch Points:', str( algorithms[alg_name].users[0].SwitchPoints) print( str(alg_name) + "New UCBS: " + str(algorithms[alg_name].users[0].newUCBs)) print( str(alg_name) + "Discarded UCBS: " + str(algorithms[alg_name].users[0].discardUCBs)) #Plot Switch Points for alg_name, alg in algorithms.items(): if 'dLinUCB' in alg_name: total = len(alg.users[0].ModelSelection) break ActualChanges_List = [] for j in range(total): if j in actual_changes: index = actual_changes.index(j) print index, actual_changes_value[0][index] ActualChanges_List.append(actual_changes_value[0][index]) Alg_Changes_List = {} Alg_newUCBs_List = {} Alg_discardUCBs_List = {} if self.Plot: # only plot linestyles = [ 'o-', 's-', '*-', '>-', '<-', 'g-', '.-', 'o-', 's-', '*-' ] markerlist = ['*', 's', 'o', '*', 's'] f, axa = plt.subplots(2, sharex=True) # plot the results #f, axa = plt.subplots(1, sharex=True) count = 0 linestyles = [ 'o-', 's-', '*-', '>-', '<-', 'g-', '.-', 'o-', 's-', '*-' ] markerslist = ['o', 's', '*', 'g', '>', '<'] for alg_name, alg in algorithms.items(): labelName = alg_name axa[0].plot(tim_, BatchCumlateRegret[alg_name], linewidth=2, marker=markerlist[count], markevery=400, label=labelName) if alg.CanEstimateUserPreference: axa[1].plot(tim_, ThetaDiffList[alg_name], linewidth=2, marker=markerlist[count], markevery=400, label=labelName) count += 1 axa[0].axvline(actual_changes[0], color='r', linestyle='-', linewidth=1.5, label='Actual Changes') for k in actual_changes: axa[0].axvline(k, color='r', linestyle='-', linewidth=1.5) for alg_name, alg in algorithms.items(): if 'dLinUCB' in alg_name: alg = algorithms[alg_name] axa[0].axvline(alg.users[0].newUCBs[0], color='b', linestyle='-', linewidth=1.5, label='dLinUCB Detected Changes') for j in alg.users[0].newUCBs: axa[0].axvline(j, color='b', linestyle='-', linewidth=1.5) axa[0].legend(loc='upper left', prop={'size': 10}, ncol=2) #axa[2].set_xlabel("Iteration", fontsize = 20, fontweight='bold') axa[0].set_ylabel("Regret", fontsize=22, fontweight='bold') axa[0].set_title("Accumulated Regret") axa[1].legend(loc='upper left', prop={'size': 10}, ncol=1) axa[1].set_xlabel("Iteration") axa[1].set_ylabel("L2 Diff") #axa[1].set_yscale('log') axa[1].set_title("Parameter estimation error") plt.xlabel("Iteration", fontsize=22, fontweight='bold') #plt.savefig('./results/' + str(namelabel) + str(timeRun) + '.pdf') plt.show() finalRegret = {} for alg_name in algorithms.iterkeys(): print '%s: %.2f' % (alg_name, BatchCumlateRegret[alg_name][-1])