def Valid(self, T, users_te, u2s_v, u2s_h, n_batch=10): ave_AP=0.0 with open('output_eval.txt', 'w') as f: for t in range(T): rusers = users_te[t*n_batch:(t+1)*n_batch] rec=[] start=time.clock() for i,ru in enumerate(rusers): if ru in u2s_v: print ("%d] scoring user %s with %d songs"%(i,ru,len(u2s_v[ru]))) f.write("%d] scoring user %s with %d songs"%(i,ru,len(u2s_v[ru]))) else: print ("%d] scoring user %s with 0 songs"%(i,ru)) f.write("%d] scoring user %s with 0 songs"%(i,ru)) fl() songs_sorted=[] for p in self.predictors: ssongs=[] if ru in u2s_v: ssongs=MSD_util.sort_dict_dec(p.Score(u2s_v[ru],self.all_songs)) else: ssongs=list(self.all_songs) cleaned_songs = [] for x in ssongs: if len(cleaned_songs)>=self.tau: break if ru not in u2s_v or x not in u2s_v[ru]: cleaned_songs.append(x) songs_sorted+= [cleaned_songs] rec += [self.GetStochasticRec(songs_sorted, self.Gamma)] cti=time.clock()-start print ("Processed in %f secs"%cti) fl() f.write("Processed in %f secs"%cti) # valuta la rec cn la map map_cur = mAP(rusers,rec,u2s_h,self.tau) ave_AP+=map_cur print ("MAP(%d): %f (%f)"%(t,map_cur,ave_AP/(t+1))) print fl() f.write ("MAP(%d): %f (%f)\n"%(t,map_cur,ave_AP/(t+1))) print ("Done!") f.write("Done!") f.close()
def RecommendToUser(self, user, u2s_v): songs_sorted=[] for p in self.predictors: ssongs=[] if user in u2s_v: ssongs=MSD_util.sort_dict_dec(p.Score(u2s_v[user],self.all_songs)) else: ssongs=list(self.all_songs) cleaned_songs = [] for x in ssongs: if len(cleaned_songs)>=self.tau: break if x not in u2s_v[user]: cleaned_songs.append(x) songs_sorted += [cleaned_songs] return self.GetStochasticRec(songs_sorted, self.Gamma)
def RecommendToUser(self, user, u2s_v): songs_sorted=[] for p in self.predictors: ssongs=[] if user in u2s_v: ssongs=MSD_util.sort_dict_dec(p.Score(u2s_v[user],self.all_songs)) # Score returns dict (song from all_songs, score based on user history) else: ssongs=list(self.all_songs) cleaned_songs = [] for x in ssongs: if len(cleaned_songs)>=self.tau: break # we only need tau songs for recommendation if x not in u2s_v[user]: # we don't want to recommend a song that the user has already listened to cleaned_songs.append(x) songs_sorted += [cleaned_songs] #songs_sorted is an array (of #predictors) of an array (of recommended songs ) return self.GetStochasticRec(songs_sorted, self.Gamma) #chooses a predictor based on the distr, and returns the list of songs recommended by the chosen predictor
#user_min,user_max,osfile=sys.argv[1:] user_min=10 #int(user_min) user_max=100 #int(user_max) # path to the outpuut file kaggle_songs.txt osfile = "output.txt" print ("user_min: %d , user_max: %d"%(user_min,user_max)) sys.stdout.flush() #forces it to "flush" the buffer, meaning that it will write everything in the buffer to the terminal # TRIPLETS f_triplets_tr="train_triplets.txt" #48373586 triplets for training with exclusive users from kaggle_visible f_triplets_tev="kaggle_visible_evaluation_triplets.txt" #1450933 triplets for recommendation evaluation, with exclusive new users users print ('loading users in %s'%"kaggle_users.txt") sys.stdout.flush() users_v=list(MSD_util.load_users("kaggle_users.txt")) print ('default ordering by popularity') sys.stdout.flush() songs_ordered=MSD_util.sort_dict_dec(MSD_util.song_to_count(f_triplets_tr)) # song_to_count creates a dictionary (song,count) and then it sorts the dict in decresing order print ("loading unique users indexes") uu = MSD_util.unique_users(f_triplets_tr) #unique_users returns a set of unique users in the train_triplets u2i = {} # creates a dictionary (userId,index) for i,u in enumerate(uu): u2i[u]=i print ('song to users on %s'%f_triplets_tr) s2u_tr=MSD_util.song_to_users(f_triplets_tr) #creates dict with (song, set of users who have listened to this song) print ("converting users to indexes") #converts the userIDs in s2u_tr to their index uu
print " user_min : %d, user_max :%d" % (user_min,user_max) sys.stdout.flush() # triplets f_triplets_tr = "kaggle_visible_evaluation_triplets.txt" f_triplets_tev ="kaggle_visible_evaluation_triplets.txt" print 'loading users in %s ' % "kaggle_users.txt" sys.stdout.flush() users_v = list(MSD_util.load_users("kaggle_users.txt")) print ' default ordering by popularity' sys.stdout.flush() songs_ordered=MSD_util.sort_dict_dec(MSD_util.song_to_count(f_triplets_tr)) print 'loading unique users indexes' uu = MSD_util.unique_users(f_triplets_tr) u2i={} for i,u in enumerate(uu): u2i[u]=i print ' song to users on %s ' % f_triplets_tr
def generate_interaction(_tr, _va): print "Creating user song-interaction lists" _, all_songs = MSD_util.get_unique(_tr, users=False, songs=True) train_pairs, valid_pairs = MSD_util.get_user_song_pairs(_tr, _va) return all_songs, train_pairs, valid_pairs
#user_min,user_max,osfile=sys.argv[1:] #user_min=10 #int(user_min) #user_max=100 #int(user_max) # path to the outpuut file kaggle_songs.txt osfile = "output.txt" #print ("user_min: %d , user_max: %d"%(user_min,user_max)) sys.stdout.flush() #forces it to "flush" the buffer, meaning that it will write everything in the buffer to the terminal # TRIPLETS f_triplets_tr="train.txt" #48373586 triplets for training with exclusive users from kaggle_visible f_triplets_tev="testV.txt" #1450933 triplets for recommendation evaluation, with exclusive new users users f_triplets_teh = "testH.txt" print ('loading users in %s'%"kaggle_users.txt") sys.stdout.flush() users_v=list(MSD_util.load_users("kaggle_users.txt")) print ('default ordering by popularity') sys.stdout.flush() songs_ordered=MSD_util.sort_dict_dec(MSD_util.song_to_count(f_triplets_tr)) # song_to_count creates a dictionary (song,count) and then it sorts the dict in decresing order print ("loading unique users indexes") uu = MSD_util.unique_users(f_triplets_tr) #unique_users returns a set of unique users in the train_triplets u2i = {} # creates a dictionary (userId,index) for i,u in enumerate(uu): u2i[u]=i print ('song to users on %s'%f_triplets_tr) s2u_tr=MSD_util.song_to_users(f_triplets_tr) #creates dict with (song, set of users who have listened to this song)