def Valid(self, T, users_te, u2s_v, u2s_h, n_batch=10): ave_AP=0.0 with open('output_eval.txt', 'w') as f: for t in range(T): rusers = users_te[t*n_batch:(t+1)*n_batch] rec=[] start=time.clock() for i,ru in enumerate(rusers): if ru in u2s_v: print ("%d] scoring user %s with %d songs"%(i,ru,len(u2s_v[ru]))) f.write("%d] scoring user %s with %d songs"%(i,ru,len(u2s_v[ru]))) else: print ("%d] scoring user %s with 0 songs"%(i,ru)) f.write("%d] scoring user %s with 0 songs"%(i,ru)) fl() songs_sorted=[] for p in self.predictors: ssongs=[] if ru in u2s_v: ssongs=MSD_util.sort_dict_dec(p.Score(u2s_v[ru],self.all_songs)) else: ssongs=list(self.all_songs) cleaned_songs = [] for x in ssongs: if len(cleaned_songs)>=self.tau: break if ru not in u2s_v or x not in u2s_v[ru]: cleaned_songs.append(x) songs_sorted+= [cleaned_songs] rec += [self.GetStochasticRec(songs_sorted, self.Gamma)] cti=time.clock()-start print ("Processed in %f secs"%cti) fl() f.write("Processed in %f secs"%cti) # valuta la rec cn la map map_cur = mAP(rusers,rec,u2s_h,self.tau) ave_AP+=map_cur print ("MAP(%d): %f (%f)"%(t,map_cur,ave_AP/(t+1))) print fl() f.write ("MAP(%d): %f (%f)\n"%(t,map_cur,ave_AP/(t+1))) print ("Done!") f.write("Done!") f.close()
def Valid(self, users_te, u2s_v, u2s_h, n_batch=10): #print 1 ave_AP = 0.0 for t in range(1, 2): rusers = users_te #[t*n_batch:(t+1)*n_batch] rec = [] start = time.clock() for i, ru in enumerate(rusers): if ru in u2s_v: print "%d] scoring user %s with %d songs" % ( i, ru, len(u2s_v[ru])) else: print "%d] scoring user %s with 0 songs" % (i, ru) fl() songs_sorted = [] #print len(self.predictors) #for p in self.predictors: ssongs = [] if ru in u2s_v: ssongs = MSD_util.sort_dict_dec( p.Score(u2s_v[ru], self.all_songs)) else: ssongs = list(self.all_songs) cleaned_songs = [] for x in ssongs: if len(cleaned_songs) >= self.tau: break if ru not in u2s_v or x not in u2s_v[ru]: cleaned_songs.append(x) songs_sorted += [cleaned_songs] if ru in u2s_v: print songs_sorted rec += [self.GetStochasticRec(songs_sorted, self.Gamma)] cti = time.clock() - start print "Processed in %f secs" % cti fl() # valuta la rec cn la map map_cur = mAP(rusers, rec, u2s_h, self.tau) ave_AP += map_cur print "MAP(%d): %f (%f)" % (t, map_cur, ave_AP / (t + 1)) # print fl() print "Done!"
def Valid(self, T, users_te, u2s_v, u2s_h, n_batch, suffix): ave_AP = 0.0 #for t in xrange(len(users_te)/n_batch): for t in xrange(T): random.shuffle(users_te) rusers = users_te[t * n_batch:(t + 1) * n_batch] rec = [] start = time.clock() for i, ru in enumerate(rusers): if ru in u2s_v: print "%d] scoring user %s with %d songs" % ( i, ru, len(u2s_v[ru])) else: print "%d] scoring user %s with 0 songs" % (i, ru) fl() songs_sorted = [] for p in self.predictors: ssongs = [] if ru in u2s_v: ssongs = MSD_util.sort_dict_dec( p.Score(u2s_v[ru], self.all_songs)) else: ssongs = list(self.all_songs) cleaned_songs = [] for x in ssongs: if len(cleaned_songs) >= self.tau: break if ru not in u2s_v or x not in u2s_v[ru]: cleaned_songs.append(x) songs_sorted += [cleaned_songs] rec += [self.GetStochasticRec(songs_sorted, self.Gamma)] cti = time.clock() - start print "Processed in %f secs" % cti fl() # valuta la rec cn la map map_cur = mAP(rusers, rec, u2s_h, self.tau) ave_AP += map_cur print "MAP(%d): %f (%f)" % (t + 1, map_cur, ave_AP / (t + 1)) fl() if t == 0: valid_loss_txt = open('valid' + suffix + '.txt', 'w') valid_loss_txt.write("%s,%s\n" % (map_cur, ave_AP / (t + 1))) print "Done!"
def RecommendToUser(self, user, u2s_v): songs_sorted=[] for p in self.predictors: ssongs=[] if user in u2s_v: ssongs=MSD_util.sort_dict_dec(p.Score(u2s_v[user],self.all_songs)) else: ssongs=list(self.all_songs) cleaned_songs = [] for x in ssongs: if len(cleaned_songs)>=self.tau: break if x not in u2s_v[user]: cleaned_songs.append(x) songs_sorted += [cleaned_songs] return self.GetStochasticRec(songs_sorted, self.Gamma)
def RecommendToUser(self, user, u2s_v): songs_sorted=[] for p in self.predictors: ssongs=[] if user in u2s_v: ssongs=MSD_util.sort_dict_dec(p.Score(u2s_v[user],self.all_songs)) # Score returns dict (song from all_songs, score based on user history) else: ssongs=list(self.all_songs) cleaned_songs = [] for x in ssongs: if len(cleaned_songs)>=self.tau: break # we only need tau songs for recommendation if x not in u2s_v[user]: # we don't want to recommend a song that the user has already listened to cleaned_songs.append(x) songs_sorted += [cleaned_songs] #songs_sorted is an array (of #predictors) of an array (of recommended songs ) return self.GetStochasticRec(songs_sorted, self.Gamma) #chooses a predictor based on the distr, and returns the list of songs recommended by the chosen predictor
# path to the outpuut file kaggle_songs.txt osfile = "output.txt" print ("user_min: %d , user_max: %d"%(user_min,user_max)) sys.stdout.flush() #forces it to "flush" the buffer, meaning that it will write everything in the buffer to the terminal # TRIPLETS f_triplets_tr="train_triplets.txt" #48373586 triplets for training with exclusive users from kaggle_visible f_triplets_tev="kaggle_visible_evaluation_triplets.txt" #1450933 triplets for recommendation evaluation, with exclusive new users users print ('loading users in %s'%"kaggle_users.txt") sys.stdout.flush() users_v=list(MSD_util.load_users("kaggle_users.txt")) print ('default ordering by popularity') sys.stdout.flush() songs_ordered=MSD_util.sort_dict_dec(MSD_util.song_to_count(f_triplets_tr)) # song_to_count creates a dictionary (song,count) and then it sorts the dict in decresing order print ("loading unique users indexes") uu = MSD_util.unique_users(f_triplets_tr) #unique_users returns a set of unique users in the train_triplets u2i = {} # creates a dictionary (userId,index) for i,u in enumerate(uu): u2i[u]=i print ('song to users on %s'%f_triplets_tr) s2u_tr=MSD_util.song_to_users(f_triplets_tr) #creates dict with (song, set of users who have listened to this song) print ("converting users to indexes") #converts the userIDs in s2u_tr to their index uu for s in s2u_tr: s_set = set() for u in s2u_tr[s]: s_set.add(u2i[u])
# triplets f_triplets_tr = "kaggle_visible_evaluation_triplets.txt" f_triplets_tev ="kaggle_visible_evaluation_triplets.txt" print 'loading users in %s ' % "kaggle_users.txt" sys.stdout.flush() users_v = list(MSD_util.load_users("kaggle_users.txt")) print ' default ordering by popularity' sys.stdout.flush() songs_ordered=MSD_util.sort_dict_dec(MSD_util.song_to_count(f_triplets_tr)) print 'loading unique users indexes' uu = MSD_util.unique_users(f_triplets_tr) u2i={} for i,u in enumerate(uu): u2i[u]=i print ' song to users on %s ' % f_triplets_tr s2u_tr = MSD_util.song_to_users(f_triplets_tr) print ' converting users to indexes' for s in s2u_tr: s_set = set()
import sys import MSD_util, MSD_rec # paths to data f_triplets_tr = "../data/train_data.txt" f_triplets_vv = "../data/valid_visible.txt" f_triplets_vp = "../data/valid_predict.txt" # parameters _tau = 500 print 'default ordering by popularity' sys.stdout.flush() songs_ordered = MSD_util.sort_dict_dec( MSD_util.song_to_count(f_triplets_tr, binary=False)) print 'user to songs on %s' % f_triplets_vv u2s_vv = MSD_util.user_to_songs(f_triplets_vv) print 'user to songs on %s' % f_triplets_vp u2s_vp = MSD_util.user_to_songs(f_triplets_vp) # recommend top N most popular songs (extremely unpersonalized :|) all_recs = [] for u in u2s_vv: recs_500 = set(songs_ordered[:500]) - u2s_vv[u] recs4u = list(recs_500) if len(recs4u) < 500: n_more = 500 - len(recs4u) recs4u += songs_ordered[500:500 + n_more] all_recs.append(recs4u)
print "user_min: %d , user_max: %d" % (user_min, user_max) sys.stdout.flush() # TRIPLETS f_triplets_tr = "kaggle_visible_evaluation_triplets.txt" f_triplets_tev = "year1_valid_triplets_visible.txt" f_triplets_teh = "year1_valid_triplets_hidden.txt" print 'loading users in %s' % "kaggle_users.txt" sys.stdout.flush() users_v = list(MSD_util.load_users("user_valid.txt")) print 'default ordering by popularity' sys.stdout.flush() songs_ordered = MSD_util.sort_dict_dec(MSD_util.song_to_count(f_triplets_tr)) print "loading unique users indexes" uu = MSD_util.unique_users(f_triplets_tr) u2i = {} for i, u in enumerate(uu): u2i[u] = i print 'song to users on %s' % f_triplets_tr s2u_tr = MSD_util.song_to_users(f_triplets_tr) print "converting users to indexes" for s in s2u_tr: s_set = set() for u in s2u_tr[s]: s_set.add(u2i[u])
def main(argv): if len(argv) < 3: print( "Nee more arguments, Example:MSD_subm_rec.py user_min user_max resultFile.txt" ) user_min = 1 user_max = 110000 osfile = "resultfull.txt" #exit() else: user_min = argv[0] user_max = argv[1] osfile = argv[2] user_min = int(user_min) user_max = int(user_max) print("user_min: %d , user_max: %d" % (user_min, user_max)) sys.stdout.flush() # TRIPLETS f_triplets_tr = "train_triplets.txt" f_triplets_tev = "kaggle_visible_evaluation_triplets.txt" print('loading users in %s' % "kaggle_users.txt") sys.stdout.flush() users_v = list(MSD_util.load_users("kaggle_users.txt")) print('default ordering by popularity') sys.stdout.flush() songs_ordered = MSD_util.sort_dict_dec( MSD_util.song_to_count(f_triplets_tr)) print("loading unique users indexes") uu = MSD_util.unique_users(f_triplets_tr) u2i = {} for i, u in enumerate(uu): u2i[u] = i print('song to users on %s' % f_triplets_tr) s2u_tr = MSD_util.song_to_users(f_triplets_tr) print("converting users to indexes") for s in s2u_tr: s_set = set() for u in s2u_tr[s]: s_set.add(u2i[u]) s2u_tr[s] = s_set del u2i print('user to songs on %s' % f_triplets_tev) u2s_v = MSD_util.user_to_songs(f_triplets_tev) print('Creating predictor..') _A = 0.15 _Q = 3 ### calibrated ### pr=MSD_rec.PredSIc(s2u_tr, _A, _Q, "songs_scores.txt") ### uncalibrated pr = MSD_rec.PredSI(s2u_tr, _A, _Q) print('Creating recommender..') cp = MSD_rec.SReco(songs_ordered) cp.Add(pr) cp.Gamma = [1.0] r = cp.RecommendToUsers(users_v[user_min:user_max], u2s_v) MSD_util.save_recommendations(r, "kaggle_songs.txt", osfile)