def test_mostSimilarUserIDs(self): userID = 'Marcel Caraciolo' recSys = UserRecommender(self.model, self.similarity, self.neighbor, True) self.assertEquals( ['Leopoldo Pires', 'Steve Gates', 'Lorena Abreu', 'Penny Frewman'], recSys.mostSimilarUserIDs(userID, 4))
def test_user_no_preference_mostSimilarUserIDs(self): userID = "Maria Gabriela" recSys = UserRecommender(self.model, self.similarity, self.neighbor, True) self.assertEquals( ["Leopoldo Pires", "Lorena Abreu", "Luciana Nunes", "Marcel Caraciolo"], recSys.mostSimilarUserIDs(userID, 4), )
def test_local_estimatePreference(self): userID = "Marcel Caraciolo" itemID = "Superman Returns" recSys = UserRecommender(self.model, self.similarity, self.neighbor, True) self.assertAlmostEquals( 3.5, recSys.estimatePreference(userID=userID, similarity=self.similarity, itemID=itemID) )
def test_local_not_existing_capper_False_estimatePreference(self): userID = "Leopoldo Pires" itemID = "You, Me and Dupree" recSys = UserRecommender(self.model, self.similarity, self.neighbor, False) self.assertAlmostEquals( 2.065394689, recSys.estimatePreference(userID=userID, similarity=self.similarity, itemID=itemID) )
def test_semi_watched_allOtherItems(self): userID = 'Leopoldo Pires' recSys = UserRecommender(self.model, self.similarity, self.neighbor, True) nearestN = self.neighbor.userNeighborhood(userID) self.assertEquals(['Just My Luck', 'You, Me and Dupree'], recSys.allOtherItems(userID, nearestN))
def test_evaluate_RMSRecommenderEvaluator(self): evaluator = RMSRecommenderEvaluator() recommender = UserRecommender(self.model, self.similarity, self.neighbor, True) evaluationPercentage = 1.0 trainingPercentage = 0.7 numUsers = self.model.NumUsers() trainingUsers = {} testUserPrefs = {} self.total = 0 self.diffs = 0.0 for userID in self.model.UserIDs(): if random() < evaluationPercentage: evaluator.processOneUser(trainingPercentage, trainingUsers, testUserPrefs, userID, self.model) total_training = sum([ len([pref for pref in prefs]) for user, prefs in trainingUsers.iteritems() ]) total_testing = sum([ len([pref for pref in prefs]) for user, prefs in testUserPrefs.iteritems() ]) #self.assertAlmostEquals(total_training/float(total_training+total_testing), 0.7) #self.assertAlmostEquals(total_testing/float(total_training+total_testing), 0.3) trainingModel = DictDataModel(trainingUsers) self.assertEquals(sorted(trainingModel.UserIDs()), sorted([user for user in trainingUsers])) recommender.model = trainingModel self.assertEquals(recommender.model, trainingModel) for userID, prefs in testUserPrefs.iteritems(): estimatedPreference = None for pref in prefs: try: estimatedPreference = recommender.estimatePreference( userID=userID, similarity=self.similarity, itemID=pref) except: pass if estimatedPreference is not None: estimatedPreference = evaluator.capEstimatePreference( estimatedPreference) self.assert_( estimatedPreference <= evaluator.maxPreference and estimatedPreference >= evaluator.minPreference) diff = prefs[pref] - estimatedPreference self.diffs += (diff * diff) self.total += 1 result = sqrt(self.diffs / float(self.total))
def test_user_no_preference_mostSimilarUserIDs(self): userID = 'Maria Gabriela' recSys = UserRecommender(self.model, self.similarity, self.neighbor, True) self.assertEquals([ 'Leopoldo Pires', 'Lorena Abreu', 'Luciana Nunes', 'Marcel Caraciolo' ], recSys.mostSimilarUserIDs(userID, 4))
def test_non_watched_allOtherItems(self): userID = 'Maria Gabriela' recSys = UserRecommender(self.model, self.similarity, self.neighbor, True) nearestN = self.neighbor.userNeighborhood(userID) self.assertEquals([ 'Lady in the Water', 'Snakes on a Plane', 'Just My Luck', 'Superman Returns', 'You, Me and Dupree', 'The Night Listener' ], recSys.allOtherItems(userID, nearestN))
def test_local_not_existing_rescorer_estimatePreference(self): userID = "Leopoldo Pires" itemID = "You, Me and Dupree" recSys = UserRecommender(self.model, self.similarity, self.neighbor, False) scorer = TanHScorer() self.assertAlmostEquals( 2.5761016605, recSys.estimatePreference(userID=userID, similarity=self.similarity, itemID=itemID, rescorer=scorer), )
def test_local_not_existing_capper_False_estimatePreference(self): userID = 'Leopoldo Pires' itemID = 'You, Me and Dupree' recSys = UserRecommender(self.model, self.similarity, self.neighbor, False) self.assertAlmostEquals( 2.065394689, recSys.estimatePreference(userID=userID, similarity=self.similarity, itemID=itemID))
def test_local_estimatePreference(self): userID = 'Marcel Caraciolo' itemID = 'Superman Returns' recSys = UserRecommender(self.model, self.similarity, self.neighbor, True) self.assertAlmostEquals( 3.5, recSys.estimatePreference(userID=userID, similarity=self.similarity, itemID=itemID))
def test_local_not_existing_rescorer_estimatePreference(self): userID = 'Leopoldo Pires' itemID = 'You, Me and Dupree' recSys = UserRecommender(self.model, self.similarity, self.neighbor, False) scorer = TanHScorer() self.assertAlmostEquals( 2.5761016605, recSys.estimatePreference(userID=userID, similarity=self.similarity, itemID=itemID, rescorer=scorer))
def test_non_watched_allOtherItems(self): userID = "Maria Gabriela" recSys = UserRecommender(self.model, self.similarity, self.neighbor, True) nearestN = self.neighbor.userNeighborhood(userID) self.assertEquals( [ "Lady in the Water", "Snakes on a Plane", "Just My Luck", "Superman Returns", "You, Me and Dupree", "The Night Listener", ], recSys.allOtherItems(userID, nearestN), )
def test_evaluate_RMSRecommenderEvaluator(self): evaluator = RMSRecommenderEvaluator() recommender = UserRecommender(self.model,self.similarity,self.neighbor,True) evaluationPercentage = 1.0 trainingPercentage = 0.7 numUsers = self.model.NumUsers() trainingUsers = {} testUserPrefs = {} self.total = 0 self.diffs = 0.0 for userID in self.model.UserIDs(): if random() < evaluationPercentage: evaluator.processOneUser(trainingPercentage,trainingUsers,testUserPrefs,userID,self.model) total_training = sum([ len([pref for pref in prefs]) for user,prefs in trainingUsers.iteritems()]) total_testing = sum([ len([pref for pref in prefs]) for user,prefs in testUserPrefs.iteritems()]) #self.assertAlmostEquals(total_training/float(total_training+total_testing), 0.7) #self.assertAlmostEquals(total_testing/float(total_training+total_testing), 0.3) trainingModel = DictDataModel(trainingUsers) self.assertEquals(sorted(trainingModel.UserIDs()), sorted([user for user in trainingUsers])) recommender.model = trainingModel self.assertEquals(recommender.model,trainingModel) for userID,prefs in testUserPrefs.iteritems(): estimatedPreference = None for pref in prefs: try: estimatedPreference = recommender.estimatePreference(userID=userID,similarity=self.similarity,itemID=pref) except: pass if estimatedPreference is not None: estimatedPreference = evaluator.capEstimatePreference(estimatedPreference) self.assert_(estimatedPreference <= evaluator.maxPreference and estimatedPreference >= evaluator.minPreference) diff = prefs[pref] - estimatedPreference self.diffs+= (diff * diff) self.total += 1 result = sqrt(self.diffs / float(self.total))
def test_create_UserBasedRecommender(self): recSys = UserRecommender(self.model, self.similarity, self.neighbor, True) self.assertEquals(recSys.similarity, self.similarity) self.assertEquals(recSys.capper, True) self.assertEquals(recSys.neighborhood, self.neighbor) self.assertEquals(recSys.model, self.model)
def test_evaluate_at_not_enough_IRStatsRecommenderEvaluator(self): evaluator = IRStatsRecommenderEvaluator() recommender = UserRecommender(self.model, self.similarity, self.neighbor, True) result = evaluator.evaluate(recommender, self.model, 4, 1.0) self.assertEquals( result, { 'nDCG': None, 'recall': None, 'f1Score': None, 'precision': None, 'fallOut': None })
def test_mostSimilarUserIDs(self): userID = "Marcel Caraciolo" recSys = UserRecommender(self.model, self.similarity, self.neighbor, True) self.assertEquals( ["Leopoldo Pires", "Steve Gates", "Lorena Abreu", "Penny Frewman"], recSys.mostSimilarUserIDs(userID, 4) )
def test_mostSimilarUserIDs(self): userID = 'Marcel Caraciolo' recSys = UserRecommender(self.model,self.similarity,self.neighbor,True) self.assertEquals(['Leopoldo Pires', 'Steve Gates', 'Lorena Abreu', 'Penny Frewman'],recSys.mostSimilarUserIDs(userID,4))
def test_semi_watched_allOtherItems(self): userID = "Leopoldo Pires" recSys = UserRecommender(self.model, self.similarity, self.neighbor, True) nearestN = self.neighbor.userNeighborhood(userID) self.assertEquals(["Just My Luck", "You, Me and Dupree"], recSys.allOtherItems(userID, nearestN))
def test_User_IRStatsRecommenderEvaluator(self): evaluator = IRStatsRecommenderEvaluator() recommender = UserRecommender(self.model, self.similarity, self.neighbor, True) result = evaluator.evaluate(recommender, self.model, 2, 1.0)
def test_empty_mostSimilarUserIDs(self): userID = 'Maria Gabriela' recSys = UserRecommender(self.model, self.similarity, self.neighbor, True) self.assertEquals([], recSys.mostSimilarUserIDs(userID, 0))
def test_all_watched_allOtherItems(self): userID = "Luciana Nunes" recSys = UserRecommender(self.model, self.similarity, self.neighbor, True) nearestN = self.neighbor.userNeighborhood(userID) self.assertEquals([], recSys.allOtherItems(userID, nearestN))
def test_evaluate_IRStatsRecommenderEvaluator(self): evaluator = IRStatsRecommenderEvaluator() recommender = UserRecommender(self.model, self.similarity, self.neighbor, True) evaluationPercentage = 1.0 relevanceThreshold = None at = 2 irStats = { 'precision': 0.0, 'recall': 0.0, 'fallOut': 0.0, 'nDCG': 0.0 } irFreqs = {'precision': 0, 'recall': 0, 'fallOut': 0, 'nDCG': 0} nItems = self.model.NumItems() self.assertEquals(nItems, 6) for userID in self.model.UserIDs(): if random() < evaluationPercentage: prefs = self.model.PreferencesFromUser(userID) if len(prefs) < 2 * at: #Really not enough prefs to meaningfully evaluate the user self.assert_( userID in ['Leopoldo Pires', 'Penny Frewman', 'Maria Gabriela']) continue relevantItemIDs = [] #List some most-preferred items that would count as most relevant results relevanceThreshold = relevanceThreshold if relevanceThreshold else evaluator.computeThreshold( prefs) prefs = sorted(prefs, key=lambda x: x[1], reverse=True) self.assertEquals(max([pref[1] for pref in prefs]), prefs[0][1]) for index, pref in enumerate(prefs): if index < at: if pref[1] >= relevanceThreshold: relevantItemIDs.append(pref[0]) self.assertEquals(relevantItemIDs, [ p[0] for p in sorted([ pref for pref in prefs if pref[1] >= relevanceThreshold ], key=lambda x: x[1], reverse=True)[:at] ]) if len(relevantItemIDs) == 0: continue trainingUsers = {} for otherUserID in self.model.UserIDs(): evaluator.processOtherUser(userID, relevantItemIDs, trainingUsers, otherUserID, self.model) trainingModel = DictDataModel(trainingUsers) recommender.model = trainingModel try: prefs = trainingModel.PreferencesFromUser(userID) if not prefs: continue except: #Excluded all prefs for the user. move on. continue recommendedItems = recommender.recommend(userID, at) self.assert_(len(recommendedItems) <= 2) intersectionSize = len([ recommendedItem for recommendedItem in recommendedItems if recommendedItem in relevantItemIDs ]) #Precision if len(recommendedItems) > 0: irStats['precision'] += (intersectionSize / float(len(recommendedItems))) irFreqs['precision'] += 1 #Recall irStats['recall'] += (intersectionSize / float(len(relevantItemIDs))) irFreqs['recall'] += 1 #Fall-Out if len(relevantItemIDs) < len(prefs): irStats['fallOut'] += (len(recommendedItems) - intersectionSize) / float( nItems - len(relevantItemIDs)) irFreqs['fallOut'] += 1 #nDCG #In computing , assume relevant IDs have relevance 1 and others 0. cumulativeGain = 0.0 idealizedGain = 0.0 for index, recommendedItem in enumerate(recommendedItems): discount = 1.0 if index == 0 else 1.0 / evaluator.log2( index + 1) if recommendedItem in relevantItemIDs: cumulativeGain += discount #Otherwise we are multiplying discount by relevance 0 so it does nothing. #Ideally results would be ordered with all relevant ones first, so this theoretical #ideal list starts with number of relevant items equal to the total number of relevant items if index < len(relevantItemIDs): idealizedGain += discount irStats['nDCG'] += float(cumulativeGain) / idealizedGain irFreqs['nDCG'] += 1 for key in irFreqs: irStats[key] = irStats[key] / float(irFreqs[key]) sum_score = irStats['precision'] + irStats['recall'] if irStats[ 'precision'] is not None and irStats['recall'] is not None else None irStats['f1Score'] = None if not sum_score else ( 2.0) * irStats['precision'] * irStats['recall'] / sum_score
def test_user_no_preference_mostSimilarUserIDs(self): userID = 'Maria Gabriela' recSys = UserRecommender(self.model,self.similarity,self.neighbor,True) self.assertEquals(['Leopoldo Pires', 'Lorena Abreu', 'Luciana Nunes', 'Marcel Caraciolo'],recSys.mostSimilarUserIDs(userID,4))
def test_empty_mostSimilarUserIDs(self): userID = "Maria Gabriela" recSys = UserRecommender(self.model, self.similarity, self.neighbor, True) self.assertEquals([], recSys.mostSimilarUserIDs(userID, 0))
def test_recommend(self): userID = "Leopoldo Pires" recSys = UserRecommender(self.model, self.similarity, self.neighbor, False) self.assertEquals(["Just My Luck", "You, Me and Dupree"], recSys.recommend(userID, 4))
def test_full_recommend(self): userID = "Maria Gabriela" recSys = UserRecommender(self.model, self.similarity, self.neighbor, False) self.assertEquals([], recSys.recommend(userID, 4))
def test_User_AvgDistanceRecSys(self): evaluator = AverageAbsoluteDifferenceRecommenderEvaluator() recommender = UserRecommender(self.model, self.similarity, self.neighbor, True) result = evaluator.evaluate(recommender, self.model, 0.7, 1.0)
def test_full_recommend(self): userID = 'Maria Gabriela' recSys = UserRecommender(self.model, self.similarity, self.neighbor, False) self.assertEquals([], recSys.recommend(userID, 4))
def test_semi_recommend(self): userID = 'Leopoldo Pires' recSys = UserRecommender(self.model, self.similarity, self.neighbor, False) self.assertEquals(['Just My Luck'], recSys.recommend(userID, 1))
def test_all_watched_allOtherItems(self): userID = 'Luciana Nunes' recSys = UserRecommender(self.model, self.similarity, self.neighbor, True) nearestN = self.neighbor.userNeighborhood(userID) self.assertEquals([], recSys.allOtherItems(userID, nearestN))
def test_empty_recommend(self): userID = "Marcel Caraciolo" recSys = UserRecommender(self.model, self.similarity, self.neighbor, False) self.assertEquals([], recSys.recommend(userID, 4))
def test_recommend(self): userID = 'Leopoldo Pires' recSys = UserRecommender(self.model, self.similarity, self.neighbor, False) self.assertEquals(['Just My Luck', 'You, Me and Dupree'], recSys.recommend(userID, 4))
def test_semi_recommend(self): userID = "Leopoldo Pires" recSys = UserRecommender(self.model, self.similarity, self.neighbor, False) self.assertEquals(["Just My Luck"], recSys.recommend(userID, 1))
def test_empty_recommend(self): userID = 'Marcel Caraciolo' recSys = UserRecommender(self.model, self.similarity, self.neighbor, False) self.assertEquals([], recSys.recommend(userID, 4))
def test_evaluate_IRStatsRecommenderEvaluator(self): evaluator = IRStatsRecommenderEvaluator() recommender = UserRecommender(self.model,self.similarity,self.neighbor,True) evaluationPercentage = 1.0 relevanceThreshold = None at = 2 irStats = {'precision': 0.0, 'recall': 0.0, 'fallOut': 0.0, 'nDCG': 0.0} irFreqs = {'precision': 0, 'recall': 0, 'fallOut': 0, 'nDCG': 0} nItems = self.model.NumItems() self.assertEquals(nItems,6) for userID in self.model.UserIDs(): if random() < evaluationPercentage: prefs = self.model.PreferencesFromUser(userID) if len(prefs) < 2 * at: #Really not enough prefs to meaningfully evaluate the user self.assert_(userID in ['Leopoldo Pires', 'Penny Frewman', 'Maria Gabriela']) continue relevantItemIDs = [] #List some most-preferred items that would count as most relevant results relevanceThreshold = relevanceThreshold if relevanceThreshold else evaluator.computeThreshold(prefs) prefs = sorted(prefs,key=lambda x: x[1], reverse=True) self.assertEquals(max([pref[1] for pref in prefs]), prefs[0][1]) for index,pref in enumerate(prefs): if index < at: if pref[1] >= relevanceThreshold: relevantItemIDs.append(pref[0]) self.assertEquals(relevantItemIDs, [ p[0] for p in sorted([ pref for pref in prefs if pref[1] >= relevanceThreshold],key=lambda x: x[1], reverse=True)[:at] ] ) if len(relevantItemIDs) == 0: continue trainingUsers = {} for otherUserID in self.model.UserIDs(): evaluator.processOtherUser(userID,relevantItemIDs,trainingUsers,otherUserID,self.model) trainingModel = DictDataModel(trainingUsers) recommender.model = trainingModel try: prefs = trainingModel.PreferencesFromUser(userID) if not prefs: continue except: #Excluded all prefs for the user. move on. continue recommendedItems = recommender.recommend(userID,at) self.assert_(len(recommendedItems)<= 2) intersectionSize = len([ recommendedItem for recommendedItem in recommendedItems if recommendedItem in relevantItemIDs]) #Precision if len(recommendedItems) > 0: irStats['precision']+= (intersectionSize / float(len(recommendedItems))) irFreqs['precision']+=1 #Recall irStats['recall'] += (intersectionSize/ float(len(relevantItemIDs))) irFreqs['recall']+=1 #Fall-Out if len(relevantItemIDs) < len(prefs): irStats['fallOut'] += (len(recommendedItems) - intersectionSize) / float( nItems - len(relevantItemIDs)) irFreqs['fallOut'] +=1 #nDCG #In computing , assume relevant IDs have relevance 1 and others 0. cumulativeGain = 0.0 idealizedGain = 0.0 for index,recommendedItem in enumerate(recommendedItems): discount = 1.0 if index == 0 else 1.0/ evaluator.log2(index+1) if recommendedItem in relevantItemIDs: cumulativeGain+=discount #Otherwise we are multiplying discount by relevance 0 so it does nothing. #Ideally results would be ordered with all relevant ones first, so this theoretical #ideal list starts with number of relevant items equal to the total number of relevant items if index < len(relevantItemIDs): idealizedGain+= discount irStats['nDCG'] += float(cumulativeGain) / idealizedGain irFreqs['nDCG'] +=1 for key in irFreqs: irStats[key] = irStats[key] / float(irFreqs[key]) sum_score = irStats['precision'] + irStats['recall'] if irStats['precision'] is not None and irStats['recall'] is not None else None irStats['f1Score'] = None if not sum_score else (2.0) * irStats['precision'] * irStats['recall'] / sum_score
def test_non_watched_allOtherItems(self): userID = 'Maria Gabriela' recSys = UserRecommender(self.model,self.similarity,self.neighbor,True) nearestN = self.neighbor.userNeighborhood(userID) self.assertEquals(['Lady in the Water', 'Snakes on a Plane', 'Just My Luck', 'Superman Returns', 'You, Me and Dupree', 'The Night Listener'],recSys.allOtherItems(userID,nearestN))