def queryRanker(self): #Extract the high frequency queries from the training_queries HighFreqQueries = [] training_queries = queryClass.load_queries(self.path_train, self.feature_count) test_queries = queryClass.load_queries(self.path_test, self.feature_count) #loop through all queries in the training set for index in training_queries.get_qids(): highQuery = training_queries.get_query(index) #only keep the frequent queries if(len(highQuery.__labels__) > self.minFreqCount): HighFreqQueries.append(highQuery) print "found "+ str(len(HighFreqQueries)) + " high frequency queries" #build the query-ranker dictionary BestRanker = queryRankers() user_model = environment.CascadeUserModel(self.clickModel) evaluation2 = evaluation.NdcgEval() #test_queries = query.load_queries(sys.argv[2], feature_count) print "Read in training and testing queries" #for every query learn the best ranker and save it to the dictionary iter=0 for highQuery in HighFreqQueries: ran=random.random() iter=iter+1 if ran<self.threshold: print str(iter*100/len(HighFreqQueries))+"%" for i in xrange(self.rankersPerQuery): learner = retrieval_system.ListwiseLearningSystem(self.feature_count, '-w random -c comparison.ProbabilisticInterleave -r ranker.ProbabilisticRankingFunction -s 3 -d 0.1 -a 0.01') BestRanker.addInitRank(highQuery.get_qid(),learner.get_solution().w) q = highQuery for t in range(self.iterationCount): l = learner.get_ranked_list(q) c = user_model.get_clicks(l, q.get_labels()) s = learner.update_solution(c) e = evaluation2.evaluate_all(s, test_queries) BestRanker.add(highQuery.get_qid(),learner.get_solution().w) BestRanker.addList(highQuery.get_qid(),l) BestRanker.addEval(highQuery.get_qid(),e) #save the dictionary to a file ('bestRanker.p') paths=self.path_train.split('/') name=paths[1] #pickle.dump(BestRanker, open( "QueryData/"+name+".data", "wb" ) ) pickle.dump(BestRanker, open( "QueryData/"+self.dataset+str(self.iterationCount)+".data", "wb" ) ) test = pickle.load( open( "QueryData/"+self.dataset+str(self.iterationCount)+".data", "rb" ) ) print test.query_ranker.values()
def groupRanker(self): #Extract the high frequency queries from the training_queries clusterData=pickle.load(open( self.clusterDataPath, "rb" ) ) queryData= self.queryData HighFreqQueries = [] training_queries = queryClass.load_queries(self.path_train, self.feature_count) test_queries = queryClass.load_queries(self.path_test, self.feature_count) #loop through all queries in the training set #build the query-ranker dictionary BestRanker = queryRankers() user_model = environment.CascadeUserModel(self.clickModel) evaluation2 = evaluation.NdcgEval() #test_queries = query.load_queries(sys.argv[2], feature_count) print "Read in training and testing queries" #for every query learn the best ranker and save it to the dictionary iter=0 learner=[0]*len(clusterData.clusterToRanker.keys()) for cluster in clusterData.clusterToRanker: learner[cluster] = retrieval_system.ListwiseLearningSystem(self.feature_count, '-w random -c comparison.ProbabilisticInterleave -r ranker.ProbabilisticRankingFunction -s 3 -d 0.1 -a 0.01') for t in range(self.iterationCount): q = training_queries[random.choice(training_queries.keys())] temp=(float(np.sum(clusterData.queryToCluster[q.get_qid()])))/(float(len(clusterData.queryToCluster[q.get_qid()]))) temp=int(temp+0.5) cluster=temp #cluster=clusterData.queryToCluster[q.get_qid()][0] iter=iter+1 if iter%200==0: print str(iter*100/self.iterationCount)+"%" l = learner[cluster].get_ranked_list(q) c = user_model.get_clicks(l, q.get_labels()) s = learner[cluster].update_solution(c) #e = evaluation2.evaluate_all(s, test_queries) for cluster in clusterData.clusterToRanker: clusterData.clusterToRanker[cluster]=[learner[cluster].get_solution().w.tolist()] #save the dictionary to a file ('bestRanker.p') paths=self.path_train.split('/') name=paths[1] #pickle.dump(BestRanker, open( "QueryData/"+name+".data", "wb" ) ) pickle.dump(clusterData, open( "ClusterData/"+self.dataset+".data", "wb" ) )
import sys, random try: import include except: pass import retrieval_system, environment, evaluation, query import os from queryRankers import * import pickle os.chdir("..") os.chdir("..") os.chdir("..") #feature_count=136 rankerDict=queryRankers() #feature_count=245 feature_count=64 learner = retrieval_system.ListwiseLearningSystem(feature_count, '-w random -c comparison.ProbabilisticInterleave -r ranker.ProbabilisticRankingFunction -s 3 -d 0.1 -a 0.01') user_model = environment.CascadeUserModel('--p_click 0:0.0,1:1 --p_stop 0:0.0,1:0.0') evaluation = evaluation.NdcgEval() training_queries = query.load_queries(sys.argv[1], feature_count) query_freq={} for train in training_queries: if(len(train.__labels__) in query_freq): query_freq[len(train.__labels__)]=query_freq[len(train.__labels__)]+1 else: query_freq[len(train.__labels__)]=1 print query_freq test_queries = query.load_queries(sys.argv[2], feature_count) for i in range(20):
import sys, random try: import include except: pass import retrieval_system, environment, evaluation, query import os from queryRankers import * import pickle os.chdir("..") os.chdir("..") os.chdir("..") #feature_count=136 rankerDict = queryRankers() #feature_count=245 feature_count = 64 learner = retrieval_system.ListwiseLearningSystem( feature_count, '-w random -c comparison.ProbabilisticInterleave -r ranker.ProbabilisticRankingFunction -s 3 -d 0.1 -a 0.01' ) user_model = environment.CascadeUserModel( '--p_click 0:0.0,1:1 --p_stop 0:0.0,1:0.0') evaluation = evaluation.NdcgEval() training_queries = query.load_queries(sys.argv[1], feature_count) query_freq = {} for train in training_queries: if (len(train.__labels__) in query_freq): query_freq[len( train.__labels__)] = query_freq[len(train.__labels__)] + 1