def __init__(self, filename): print 'Creating Vector Space..' self.vsRanker = VectorSpace(filename) print 'Ranking users...' self.tweetRanker = TweetUserRanking(filename)
class VectorSpaceAndUserRank: #weight for combining pagerank with vectorspace results beta = .4 # object of the vector space retriever vsRanker = '' # object of the page ranker tweetRanker = '' def __init__(self, filename): print 'Creating Vector Space..' self.vsRanker = VectorSpace(filename) print 'Ranking users...' self.tweetRanker = TweetUserRanking(filename) # gets the users from the vector space query results def getUsersFromVsResults(self, vsResults): usersList = [] for result in vsResults: usersList.append(result['username']) return usersList def processQuery(self, query): # fetch the top 100 results from vector space retrieval vsResults = self.vsRanker.processQuery(query, 200) if len(vsResults) == 0: return [] # get the users from the result set usersList = self.getUsersFromVsResults(vsResults) # arrange the users by their ranks userScoreDict = self.tweetRanker.getRankScoresForUsers(usersList) scoreDict = {} vsPageRankScoredResults = {} # now we caculate the product of zipfian ranks and cosine similaritiy # for all results we got from vector space retrieval for vsResult in vsResults: rankScore = userScoreDict[vsResult['username']] finalScore = vsResult['cosineSimilarity'] * self.beta + (1 - self.beta) * rankScore result = { 'text' : vsResult['text'], 'username' : vsResult['username'] } vsPageRankScoredResults[vsResult['tweetId']] = result scoreDict[vsResult['tweetId']] = finalScore # sort the scoreDict by the finalScores resultDict = sorted(scoreDict.items(), None , itemgetter(1) , True) finalresult = [] for tweet in resultDict: #fill the final result dictionary with the results #finalRankScore = tweet[1] tweetId = tweet[0] tweetText = vsPageRankScoredResults[tweetId]['text'] finalresult.append(tweetText) finalresult = finalresult[:50] # return the top 50 results return finalresult def getVectorSpaceQueryResults(self, query, numResults): results = self.vsRanker.processQuery(query, numResults) resultingTweetTexts = [] for result in results: resultingTweetTexts.append(result['text']) return resultingTweetTexts def getTopKUsers(self, K): results = self.tweetRanker.getTopKUsers(K) topKUserNames = [] for result in results: topKUserNames.append(result[0]) return topKUserNames