def person_correlation_score(): """ Pearson Correlation Score - Best Fit Line Jack하고 Gene의 그래프와 Jack하고 Toby의 그래프를 비교하자 - Grade Inflation 을 극복한다 Chang 하고 Jack 을 비교해보자.. Euclidean Distance 사용시와 비교 """ display_critics('Jack Matthews', 'Chang') print 'Jack Matthews', 'Toby :', sim_pearson(critics, 'Jack Matthews', 'Toby') print 'Jack Matthews', 'Gene Seymour :', sim_pearson( critics, 'Jack Matthews', 'Gene Seymour') print 'Jack Matthews', 'Chang :', sim_pearson(critics, 'Jack Matthews', 'Chang')
def correlated_to_you(self, you): arr_r = [] prefs = self.prefs for other in prefs: r = recommendations.sim_pearson(prefs, str(you), other) arr_r.append((r, other)) arr_r.sort() #sort by correlation value (ascending) arr_r.reverse() #sort by correlation value (descending) total = len(arr_r) # take top 5 and bottom 5 users. return (arr_r[0:5], arr_r[total-5:total])
def getn_similarcritics(prefs,person,n=5): dict1 = {} high = 0 # Compare critics to person and add to dict1 for b in prefs: if b != person: crit2 = str(b) pear = recommendations.sim_pearson(prefs, person, crit2) if pear not in dict1: dict1[pear] = [person, crit2] # return top n critics res = dict1.keys() res.sort() res.reverse() listout = [] for a in range(n): listout.append(dict1[res[a]][1]) return listout
def real_you_favorite(self): print("List of movies :") prefs = recommendations.transformPrefs(self.prefs) idx = 1 movies = [] for movie in prefs: print("[%s] %s" % (idx, movie)) movies.append(movie) idx = idx + 1 fav = input("Select your most favorite movie") fav_title = movies[int(fav)-1] scores=[(recommendations.sim_pearson(prefs, fav_title, other),other) for other in prefs if other!=fav_title] scores.sort() scores.reverse() total = len(scores) return (fav_title, scores[0:5], scores[total-6:total-1])
def getRecommendations(prefs, person_in, similarity='unknown', printOutput=False): ''' This function returned a sorted list of recommendations. The first entry of the returned list is the recommendation-value (numeric, float), the second is the alphabetic name of the key. Parameter to pass: - prefs -> Dictionary of Values - person_in -> Person you want to get recommendations - similarity -> euclid or pearson - printOutput -> Boolean, True or False for output Returning parameters: - sorted List[] with pattern: [[0.6666666666666666, 'Radio Moscow'], [0.5588425208190283, 'Led Zeppelin'], [0.5563772968714236, 'Pink Floyd'], ... ''' persons = getCleanedNames(prefs, person_in) if similarity == 'euclid': for person in persons: distance = recommendations.sim_euclid(prefs, person_in, person) dataDict_Sum = createSum(prefs, person, distance) dataDict_kSum = createKSum(prefs, person, distance) if printOutput: print ('Sumvalues: ' +str(dataDict_Sum)) print ('Sumvalues of Korrelation: ' +str(dataDict_kSum)) print ('Recommendationvalues: ' +str(createRecomm())) if similarity == 'pearson': for person in persons: distance = recommendations.sim_pearson(prefs, person_in, person) dataDict_Sum = createSum(prefs, person, distance) dataDict_kSum = createKSum(prefs, person, distance) if printOutput: print ('Sumvalues: ' +str(dataDict_Sum)) print ('Sumvalues of Korrelation: ' +str(dataDict_kSum)) print ('Recommendationvalues: ' +str(createRecomm())) return convertDict2List(createRecomm())
# pearson_correlation: # this script uses the recommendations module # to calculate pearson correlation scores between different critics # it also prints out the most similar critic to each critic. import recommendations import operator rating_pearson = {} for person1 in recommendations.critics: ratings = {} for person2 in recommendations.critics: if person1 != person2: print(person1 + ', ' + person2) ratings.update({ person2: recommendations.sim_pearson(recommendations.critics, person1, person2) }) rating_pearson.update({person1: ratings}) #rating_distance.sort() print(rating_pearson) #sort the distance by closest to 1 first. #show the closest related person to each person. closest_person = [] for person in rating_pearson.keys(): sorted_pearson = sorted(rating_pearson.get(person).items(), key=operator.itemgetter(1), reverse=True) closest_person.append({person: sorted_pearson[0]}) print(closest_person)
# Split prefs into train and test prefs movies = prefs.keys() random.shuffle(movies) movies_train, movies_test = movies[:int(0.9 * len(movies))], movies[int(0.1 * len(movies)):] train = {m: prefs[m] for m in movies_train} test = {m: prefs[m] for m in movies_test} for movie in test: sim_distances = [] sim_pearsons = [] for other_movie in train: # Calculate distance using euclidean distance sim_distances.append((r.sim_distance(prefs, movie, other_movie), other_movie)) # Calculate similarity using pearson sim_pearsons.append((r.sim_pearson(prefs, movie, other_movie), other_movie)) # distance sort ascending sim_distances.sort() # similarity sort descending sim_pearsons.sort(reverse=True) # select 1st top of the list sim_most_related_movie = sim_distances[0][1] pear_most_related_movie = sim_pearsons[0][1] # Compare print 'Using euclidean distance : Actual movie = {}, Predicted movie = {}'.format(movie, sim_most_related_movie) print 'Using pearson similarity : Actual movie = {}, Predicted movie = {}'.format(movie, pear_most_related_movie)
botList = sorted(list, key=lambda rating: rating[1], reverse=True)[-3:] #reversed order print "user", user, "------------------" #part 1 also print "most liked movies" print topList print "least liked movies" print botList correlated_list = [] with open("movielens/u.user", "r") as userlist: #find matching users for line in userlist: id, age, gender, occupation, zip = line.split("|") if id == substitute: continue current_cor = rec.sim_pearson(prefs, substitute, id) correlated_list.append([current_cor, id]) sorted_by_correlation = sorted( correlated_list) #[-5:] # sorted list of users correlated to substitute least_correlated_users = sorted_by_correlation[::-1][ -5:] # top 5 least correlated users: [::-1] reverses the list most_correlated_users = sorted_by_correlation[ -5:] # top 5 most correlated users print "most correlated users and their correlation value", most_correlated_users # part 2 print "least correlated users and their correlation value", least_correlated_users # part 2 top_matches = [] bot_matches = [] top_matches = rec.topMatches(prefs, substitute, 5) bot_matches = rec.botMatches(prefs, substitute, 5)
''' Created on 2015-6-12 @author: XXYF18 ''' from recommendations import critics,sim_distance,sim_pearson,topMatches,getRecommendations #critics['Lisa Rose']['Lady in the Water'] # #critics['Toby']['Snakes on a Plane']=4.5 #critics['Toby'] import pydelicious pydelicious.get_popular(tag='programming') print sim_distance(critics, 'Lisa Rose','Gene Seymour') print sim_pearson(critics,'Lisa Rose','Gene Seymour') print topMatches(critics,'Toby',n=3) print getRecommendations(critics,'Toby') print getRecommendations(critics,'Toby', similarity=sim_distance)
def testSanity(self): self.assertEqual(recommendations.sim_pearson(recommendations.critics, "Lisa Rose", "Gene Seymour"), myRec.sim_pearson(recommendations.critics, "Lisa Rose", "Gene Seymour"))
import recommendations #@todo сделать вывод в виде таблицы for item in recommendations.critics: print(item) for value in recommendations.critics: if item == value: continue print(value) result = recommendations.sim_pearson(recommendations.critics, item, value) print(result) print('-------') result = recommendations.sim_pearson(recommendations.critics, 'Lisa Rose', 'Gene Seymour') print(result)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Aug 23 07:49:55 2017 @author: andres """ from recommendations import critics from recommendations import sim_distance from recommendations import sim_pearson for p in critics: print(p) #Testing simdistance sim_distance(critics, 'Lisa Rose', 'Gene Seymour') sim_pearson(critics, 'Lisa Rose', 'Gene Seymour')
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2017/2/26 16:19 # @Author : Shuqi.qin # @Site : # @File : test.py # @Software: PyCharm import recommendations as rc res1 = rc.sim_distance(rc.critics, "Lisa Rose", "Gene Seymour") res2 = rc.sim_pearson(rc.critics, "Lisa Rose", "Gene Seymour") res3 = rc.topMatches(rc.critics, "Toby", n=3) res4 = rc.getRecommendationsBaseOnPersonSimilarity(rc.critics, "Toby") print 'over' import pydelicious print pydelicious.get_popular(tag='programming')
def test_Pearson(self): self.assertEqual( recommendations.sim_pearson(recommendations.critics, 'Lisa Rose', 'Gene Seymour'), 0.39605901719066977)
import recommendations print "distance" print recommendations.sim_distance(recommendations.critics, 'ming', 'lin') print recommendations.sim_distance(recommendations.critics, 'ming', 'michael') print recommendations.sim_distance(recommendations.critics, 'ming', 'mick') print "pearson" print recommendations.sim_pearson(recommendations.critics, 'ming', 'lin') print recommendations.sim_pearson(recommendations.critics, 'ming', 'michael') print recommendations.sim_pearson(recommendations.critics, 'ming', 'mick') print "top match" print recommendations.topMatches(recommendations.critics, 'ming', n = 3) print "recommendation" print recommendations.getRecommendations(recommendations.critics, 'ming') print "recommend items" items = recommendations.transformPrefs(recommendations.critics) print recommendations.topMatches(items, 'lady in the water', n =3) print recommendations.sim_pearson(items, 'lady in the water', 'snake on a plane') print "item similarity use distance" print recommendations.calculateSimilarItems(recommendations.critics) print 'item similarity use pearson' print recommendations.calculateSimilarItems(recommendations.critics, similarity = recommendations.sim_pearson) print 'recommendation base on items' itemsim = recommendations.calculateSimilarItems(recommendations.critics)
def test_Pearson(self): self.assertEqual(recommendations.sim_pearson(recommendations.critics, 'Lisa Rose','Gene Seymour'), 0.39605901719066977)
from recommendations import critics from recommendations import sim_distance from recommendations import sim_pearson from recommendations import pearsonTopMatches from recommendations import distanceTopMatches from recommendations import getRecommendations print(critics['Lisa Rose']['Lady in the Water']) critics['Toby']['Snakes on a Plane'] = 5.0 print(critics['Toby']) result = { 'euclidean distance socre': sim_distance(critics, 'Lisa Rose', 'Gene Seymour') } print(result) result = { 'Pearson correlation score': sim_pearson(critics, 'Lisa Rose', 'Gene Seymour') } print(result) print(distanceTopMatches(critics, 'Toby', n=3)) print(pearsonTopMatches(critics, 'Toby', n=3)) print('-------------------------------------') print(getRecommendations(critics, 'Toby')) print('-------------------------------------') print('-------------------------------------') print(getRecommendations(critics, 'Toby', similarity=sim_distance))
# encoding: utf-8 ''' Created on 2015年7月19日 @author: HCY ''' from recommendations import critics, sim_distance, sim_pearson import recommendations #print(critics['Lisa Rose']['Lady in the Water']) print(critics['Hcy']) print(recommendations.sim_distance(critics, 'Lisa Rose', 'Gene Seymour')) print(recommendations.sim_pearson(critics, 'Lisa Rose', 'Gene Seymour')) print(recommendations.topMatches(critics, 'Hcy',n=3,similarity=sim_pearson)) print(recommendations.topMatches(critics, 'Hcy',n=3,similarity=sim_distance)) print(recommendations.getRecommendations(critics, 'Hcy'))
from recommendations import critics from recommendations import sim_distance from recommendations import sim_pearson print sim_pearson(critics, 'Mick LaSalle', 'Toby')
if __name__ == '__main__': # stupid little demo import recommendations print 'getting users...' users = initializeUserDict('programming') users['thakis'] = {} users['ytamshg'] = {} users['tubanator'] = {} print 'getting their posts (takes a while) ...' fillItems(users) print 'Distances:' print recommendations.sim_distance(users, 'thakis', 'ytamshg') print recommendations.sim_distance(users, 'thakis', 'tubanator') print recommendations.sim_distance(users, 'tubanator', 'ytamshg') print 'Pearsons:' print recommendations.sim_pearson(users, 'thakis', 'ytamshg') print recommendations.sim_pearson(users, 'thakis', 'tubanator') print recommendations.sim_pearson(users, 'tubanator', 'ytamshg') print 'Recommendations:' print recommendations.getRecommendations(users, 'thakis')[0:10] print recommendations.getRecommendations(users, 'ytamshg')[0:10] print recommendations.getRecommendations(users, 'tubanator')[0:10]
#import pydelicious #pydelicious.get_popular(tag='programming') #from deliciousrec import * #delusers=initializeUserDict('programming') #delusers['tsegaran']={} #fillIteams(delusers) import recommendations #print recommendations.critics['Lisa Rose']['Lady in the Water'] print recommendations.critics['Lisa Rose'] print recommendations.critics['Gene Seymour'] print recommendations.sim_distance(recommendations.critics,'Lisa Rose','Gene Seymour') print recommendations.sim_pearson(recommendations.critics,'Lisa Rose','Gene Seymour') print recommendations.topMatches(recommendations.critics,'Toby',n=3) print recommendations.topMatches(recommendations.critics,'Toby',n=3,similarity=recommendations.sim_distance) recommendations.getRecommendations(recommendations.critics,'Toby') recommendations.getRecommendations(recommendations.critics,'Toby',similarity=recommendations.sim_distance) movies=recommendations.transformPrefs(recommendations.critics) #print movies recommendations.topMatches(movies,'Superman Returns')
return tag_map if __name__ == '__main__': # stupid little demo import recommendations print 'getting users...' users = initializeUserDict('programming') users['thakis'] = {} users['ytamshg'] = {} users['tubanator'] = {} print 'getting their posts (takes a while) ...' fillItems(users) print 'Distances:' print recommendations.sim_distance(users, 'thakis', 'ytamshg') print recommendations.sim_distance(users, 'thakis', 'tubanator') print recommendations.sim_distance(users, 'tubanator', 'ytamshg') print 'Pearsons:' print recommendations.sim_pearson(users, 'thakis', 'ytamshg') print recommendations.sim_pearson(users, 'thakis', 'tubanator') print recommendations.sim_pearson(users, 'tubanator', 'ytamshg') print 'Recommendations:' print recommendations.getRecommendations(users, 'thakis')[0:10] print recommendations.getRecommendations(users, 'ytamshg')[0:10] print recommendations.getRecommendations(users, 'tubanator')[0:10]
#!/usr/bin/env python import recommendations sim_pearson_result = recommendations.sim_pearson( recommendations.critics, 'Lisa Rose', 'Gene Seymour' ) print sim_pearson_result
def test_sim_pearson(self): distance = rec.sim_pearson(self.prefs, 'fulano', 'beltrano') self.assertAlmostEqual(distance, 1.) book_distance = rec.sim_pearson(rec.critics, 'Lisa Rose', 'Gene Seymour') self.assertAlmostEqual(book_distance, 0.396059017191)