示例#1
0
    def queryRanker(self):
        #Extract the high frequency queries from the training_queries
        HighFreqQueries = []
        training_queries = queryClass.load_queries(self.path_train, self.feature_count)
        test_queries = queryClass.load_queries(self.path_test, self.feature_count)
        #loop through all queries in the training set
        for index in training_queries.get_qids():
            highQuery = training_queries.get_query(index)
            #only keep the frequent queries 
            if(len(highQuery.__labels__) > self.minFreqCount):
                HighFreqQueries.append(highQuery)    
        print "found "+ str(len(HighFreqQueries)) + " high frequency queries"

        #build the query-ranker dictionary
        BestRanker = queryRankers()

        user_model = environment.CascadeUserModel(self.clickModel)
        evaluation2 = evaluation.NdcgEval()
        #test_queries = query.load_queries(sys.argv[2], feature_count)
        print "Read in training and testing queries"
        #for every query learn the best ranker and save it to the dictionary
        iter=0
        for highQuery in HighFreqQueries:
            ran=random.random()
            iter=iter+1
            if ran<self.threshold:
                print str(iter*100/len(HighFreqQueries))+"%"
                for i in xrange(self.rankersPerQuery):
                    learner = retrieval_system.ListwiseLearningSystem(self.feature_count, '-w random -c comparison.ProbabilisticInterleave -r ranker.ProbabilisticRankingFunction -s 3 -d 0.1 -a 0.01')
                    BestRanker.addInitRank(highQuery.get_qid(),learner.get_solution().w)
                    q = highQuery
                    for t in range(self.iterationCount):
                        l = learner.get_ranked_list(q)
                        c = user_model.get_clicks(l, q.get_labels())
                        s = learner.update_solution(c)
                        e = evaluation2.evaluate_all(s, test_queries)
                    
    
                    BestRanker.add(highQuery.get_qid(),learner.get_solution().w)
                    BestRanker.addList(highQuery.get_qid(),l)
                    BestRanker.addEval(highQuery.get_qid(),e)

        #save the dictionary to a file ('bestRanker.p')
        paths=self.path_train.split('/')
        name=paths[1]
        #pickle.dump(BestRanker, open( "QueryData/"+name+".data", "wb" ) )
        pickle.dump(BestRanker, open( "QueryData/"+self.dataset+str(self.iterationCount)+".data", "wb" ) )
        test = pickle.load( open( "QueryData/"+self.dataset+str(self.iterationCount)+".data", "rb" ) )
        print test.query_ranker.values()
示例#2
0
    def groupRanker(self):
        #Extract the high frequency queries from the training_queries
        clusterData=pickle.load(open( self.clusterDataPath, "rb" ) )
        queryData= self.queryData

        
        HighFreqQueries = []
        training_queries = queryClass.load_queries(self.path_train, self.feature_count)
        test_queries = queryClass.load_queries(self.path_test, self.feature_count)
        #loop through all queries in the training set
        

        #build the query-ranker dictionary
        BestRanker = queryRankers()

        user_model = environment.CascadeUserModel(self.clickModel)
        evaluation2 = evaluation.NdcgEval()
        #test_queries = query.load_queries(sys.argv[2], feature_count)
        print "Read in training and testing queries"
        #for every query learn the best ranker and save it to the dictionary
        iter=0
        learner=[0]*len(clusterData.clusterToRanker.keys())
        for cluster in clusterData.clusterToRanker:
            learner[cluster] = retrieval_system.ListwiseLearningSystem(self.feature_count, '-w random -c comparison.ProbabilisticInterleave -r ranker.ProbabilisticRankingFunction -s 3 -d 0.1 -a 0.01')  
        for t in range(self.iterationCount):
            q = training_queries[random.choice(training_queries.keys())]
            temp=(float(np.sum(clusterData.queryToCluster[q.get_qid()])))/(float(len(clusterData.queryToCluster[q.get_qid()])))
            temp=int(temp+0.5)
            cluster=temp
            #cluster=clusterData.queryToCluster[q.get_qid()][0]
            
            iter=iter+1
            if iter%200==0:
                print str(iter*100/self.iterationCount)+"%"
            l = learner[cluster].get_ranked_list(q)
            c = user_model.get_clicks(l, q.get_labels())
            s = learner[cluster].update_solution(c)
            #e = evaluation2.evaluate_all(s, test_queries)
        for cluster in clusterData.clusterToRanker:
             clusterData.clusterToRanker[cluster]=[learner[cluster].get_solution().w.tolist()]
      
            
        #save the dictionary to a file ('bestRanker.p')
        paths=self.path_train.split('/')
        name=paths[1]
        #pickle.dump(BestRanker, open( "QueryData/"+name+".data", "wb" ) )
        pickle.dump(clusterData, open( "ClusterData/"+self.dataset+".data", "wb" ) )
示例#3
0
import sys, random
try:
    import include
except:
    pass
import retrieval_system, environment, evaluation, query
import os
from queryRankers import *
import pickle

os.chdir("..")
os.chdir("..")
os.chdir("..")
#feature_count=136
rankerDict=queryRankers()
#feature_count=245
feature_count=64
learner = retrieval_system.ListwiseLearningSystem(feature_count, '-w random -c comparison.ProbabilisticInterleave -r ranker.ProbabilisticRankingFunction -s 3 -d 0.1 -a 0.01')
user_model = environment.CascadeUserModel('--p_click 0:0.0,1:1 --p_stop 0:0.0,1:0.0')
evaluation = evaluation.NdcgEval()
training_queries = query.load_queries(sys.argv[1], feature_count)
query_freq={}
for train in training_queries:
    
    if(len(train.__labels__) in query_freq):
        query_freq[len(train.__labels__)]=query_freq[len(train.__labels__)]+1
    else:
        query_freq[len(train.__labels__)]=1    
print query_freq                                 
test_queries = query.load_queries(sys.argv[2], feature_count)
for i in range(20):
示例#4
0
import sys, random
try:
    import include
except:
    pass
import retrieval_system, environment, evaluation, query
import os
from queryRankers import *
import pickle

os.chdir("..")
os.chdir("..")
os.chdir("..")
#feature_count=136
rankerDict = queryRankers()
#feature_count=245
feature_count = 64
learner = retrieval_system.ListwiseLearningSystem(
    feature_count,
    '-w random -c comparison.ProbabilisticInterleave -r ranker.ProbabilisticRankingFunction -s 3 -d 0.1 -a 0.01'
)
user_model = environment.CascadeUserModel(
    '--p_click 0:0.0,1:1 --p_stop 0:0.0,1:0.0')
evaluation = evaluation.NdcgEval()
training_queries = query.load_queries(sys.argv[1], feature_count)
query_freq = {}
for train in training_queries:

    if (len(train.__labels__) in query_freq):
        query_freq[len(
            train.__labels__)] = query_freq[len(train.__labels__)] + 1