Пример #1
0
    def Valid(self, T, users_te, u2s_v, u2s_h, n_batch=10):
        ave_AP=0.0
        with open('output_eval.txt', 'w') as f:
            for t in range(T):
                rusers = users_te[t*n_batch:(t+1)*n_batch]
                rec=[]
                start=time.clock()
                for i,ru in enumerate(rusers):
                    if ru in u2s_v:
                        print ("%d] scoring user %s with %d songs"%(i,ru,len(u2s_v[ru])))
                        f.write("%d] scoring user %s with %d songs"%(i,ru,len(u2s_v[ru])))
                    else:
                        print ("%d] scoring user %s with 0 songs"%(i,ru))
                        f.write("%d] scoring user %s with 0 songs"%(i,ru))
                    fl()
                    songs_sorted=[]
                    for p in self.predictors:
                        ssongs=[]
                        if ru in u2s_v:
                            ssongs=MSD_util.sort_dict_dec(p.Score(u2s_v[ru],self.all_songs))
                        else:
                            ssongs=list(self.all_songs)
                   
                        cleaned_songs = []
                        for x in ssongs:
                            if len(cleaned_songs)>=self.tau: 
                                break
                            if ru not in u2s_v or x not in u2s_v[ru]:
                                cleaned_songs.append(x)
                                            
                        songs_sorted+= [cleaned_songs]
                    
                    rec += [self.GetStochasticRec(songs_sorted, self.Gamma)]

                cti=time.clock()-start
                print ("Processed in %f secs"%cti)
                fl()
                f.write("Processed in %f secs"%cti)
            # valuta la rec cn la map
                map_cur = mAP(rusers,rec,u2s_h,self.tau)
                ave_AP+=map_cur
                print ("MAP(%d): %f (%f)"%(t,map_cur,ave_AP/(t+1)))
                print
                fl()
                f.write ("MAP(%d): %f (%f)\n"%(t,map_cur,ave_AP/(t+1)))
    
            print ("Done!")
            f.write("Done!")
            f.close()
Пример #2
0
    def Valid(self, users_te, u2s_v, u2s_h, n_batch=10):
        #print 1
        ave_AP = 0.0
        for t in range(1, 2):
            rusers = users_te  #[t*n_batch:(t+1)*n_batch]
            rec = []
            start = time.clock()
            for i, ru in enumerate(rusers):
                if ru in u2s_v:
                    print "%d] scoring user %s with %d songs" % (
                        i, ru, len(u2s_v[ru]))
                else:
                    print "%d] scoring user %s with 0 songs" % (i, ru)
                fl()
                songs_sorted = []
                #print len(self.predictors)
                #for p in self.predictors:
                ssongs = []
                if ru in u2s_v:
                    ssongs = MSD_util.sort_dict_dec(
                        p.Score(u2s_v[ru], self.all_songs))
                else:
                    ssongs = list(self.all_songs)

                cleaned_songs = []
                for x in ssongs:
                    if len(cleaned_songs) >= self.tau:
                        break
                    if ru not in u2s_v or x not in u2s_v[ru]:
                        cleaned_songs.append(x)

                songs_sorted += [cleaned_songs]

                if ru in u2s_v:
                    print songs_sorted
                    rec += [self.GetStochasticRec(songs_sorted, self.Gamma)]

            cti = time.clock() - start
            print "Processed in %f secs" % cti
            fl()
            # valuta la rec cn la map
            map_cur = mAP(rusers, rec, u2s_h, self.tau)
            ave_AP += map_cur
            print "MAP(%d): %f (%f)" % (t, map_cur, ave_AP / (t + 1))
            #            print
            fl()

        print "Done!"
    def Valid(self, T, users_te, u2s_v, u2s_h, n_batch, suffix):
        ave_AP = 0.0
        #for t in xrange(len(users_te)/n_batch):
        for t in xrange(T):
            random.shuffle(users_te)
            rusers = users_te[t * n_batch:(t + 1) * n_batch]
            rec = []
            start = time.clock()
            for i, ru in enumerate(rusers):
                if ru in u2s_v:
                    print "%d] scoring user %s with %d songs" % (
                        i, ru, len(u2s_v[ru]))
                else:
                    print "%d] scoring user %s with 0 songs" % (i, ru)
                fl()
                songs_sorted = []
                for p in self.predictors:
                    ssongs = []
                    if ru in u2s_v:
                        ssongs = MSD_util.sort_dict_dec(
                            p.Score(u2s_v[ru], self.all_songs))
                    else:
                        ssongs = list(self.all_songs)

                    cleaned_songs = []
                    for x in ssongs:
                        if len(cleaned_songs) >= self.tau:
                            break
                        if ru not in u2s_v or x not in u2s_v[ru]:
                            cleaned_songs.append(x)

                    songs_sorted += [cleaned_songs]

                rec += [self.GetStochasticRec(songs_sorted, self.Gamma)]

            cti = time.clock() - start
            print "Processed in %f secs" % cti
            fl()
            # valuta la rec cn la map
            map_cur = mAP(rusers, rec, u2s_h, self.tau)
            ave_AP += map_cur
            print "MAP(%d): %f (%f)" % (t + 1, map_cur, ave_AP / (t + 1))
            fl()
            if t == 0:
                valid_loss_txt = open('valid' + suffix + '.txt', 'w')
            valid_loss_txt.write("%s,%s\n" % (map_cur, ave_AP / (t + 1)))
        print "Done!"
Пример #4
0
    def RecommendToUser(self, user, u2s_v):
        songs_sorted=[]
        for p in self.predictors:
            ssongs=[]
            if user in u2s_v:
		ssongs=MSD_util.sort_dict_dec(p.Score(u2s_v[user],self.all_songs))
            else:
                ssongs=list(self.all_songs)

            cleaned_songs = []
            for x in ssongs:
                if len(cleaned_songs)>=self.tau:
	            break
                if x not in u2s_v[user]:
                    cleaned_songs.append(x)

            songs_sorted += [cleaned_songs]

        return self.GetStochasticRec(songs_sorted, self.Gamma)
Пример #5
0
    def RecommendToUser(self, user, u2s_v):
        songs_sorted=[]
        for p in self.predictors:
            ssongs=[]
            if user in u2s_v:
                ssongs=MSD_util.sort_dict_dec(p.Score(u2s_v[user],self.all_songs)) # Score returns dict (song from all_songs, score based on user history)
            else:
                ssongs=list(self.all_songs)

            cleaned_songs = []
            for x in ssongs:
                if len(cleaned_songs)>=self.tau: 
                    break # we only need tau songs for recommendation
                if x not in u2s_v[user]: # we don't want to recommend a song that the user has already listened to
                    cleaned_songs.append(x)

            songs_sorted += [cleaned_songs] #songs_sorted is an array (of #predictors) of an array (of recommended songs )

        return self.GetStochasticRec(songs_sorted, self.Gamma)         #chooses a predictor based on the distr, and returns the list of songs recommended by the chosen predictor
Пример #6
0
    def RecommendToUser(self, user, u2s_v):
        songs_sorted=[]
        for p in self.predictors:
            ssongs=[]
            if user in u2s_v:
                ssongs=MSD_util.sort_dict_dec(p.Score(u2s_v[user],self.all_songs))
            else:
                ssongs=list(self.all_songs)

            cleaned_songs = []
            for x in ssongs:
                if len(cleaned_songs)>=self.tau:
                    break
                if x not in u2s_v[user]:
                    cleaned_songs.append(x)

            songs_sorted += [cleaned_songs]

        return self.GetStochasticRec(songs_sorted, self.Gamma)
Пример #7
0
# path to the outpuut file kaggle_songs.txt
osfile = "output.txt"
print ("user_min: %d , user_max: %d"%(user_min,user_max))
sys.stdout.flush() #forces it to "flush" the buffer, meaning that it will write everything in the buffer to the terminal

# TRIPLETS
f_triplets_tr="train_triplets.txt" #48373586 triplets for training with exclusive users from kaggle_visible
f_triplets_tev="kaggle_visible_evaluation_triplets.txt" #1450933 triplets for recommendation evaluation, with exclusive new users users 

print ('loading users in %s'%"kaggle_users.txt")
sys.stdout.flush()
users_v=list(MSD_util.load_users("kaggle_users.txt"))

print ('default ordering by popularity')
sys.stdout.flush()
songs_ordered=MSD_util.sort_dict_dec(MSD_util.song_to_count(f_triplets_tr)) # song_to_count creates a dictionary (song,count) and then it sorts the dict in decresing order

print  ("loading unique users indexes")
uu = MSD_util.unique_users(f_triplets_tr) #unique_users returns a set of unique users in the train_triplets
u2i = {} # creates a dictionary (userId,index)
for i,u in enumerate(uu):
    u2i[u]=i

print ('song to users on %s'%f_triplets_tr)
s2u_tr=MSD_util.song_to_users(f_triplets_tr) #creates dict with (song, set of users who have listened to this song)

print ("converting users to indexes") #converts the userIDs in s2u_tr to their index uu
for s in s2u_tr:
    s_set = set()
    for u in s2u_tr[s]:
        s_set.add(u2i[u])
Пример #8
0

# triplets

f_triplets_tr = "kaggle_visible_evaluation_triplets.txt"
f_triplets_tev ="kaggle_visible_evaluation_triplets.txt"

print 'loading users in %s ' % "kaggle_users.txt"

sys.stdout.flush()
users_v = list(MSD_util.load_users("kaggle_users.txt"))


print ' default ordering by popularity'
sys.stdout.flush()
songs_ordered=MSD_util.sort_dict_dec(MSD_util.song_to_count(f_triplets_tr))


print 'loading unique users indexes'
uu = MSD_util.unique_users(f_triplets_tr)
u2i={}
for i,u in enumerate(uu):
    u2i[u]=i


print ' song to users on %s ' % f_triplets_tr
s2u_tr = MSD_util.song_to_users(f_triplets_tr)

print ' converting users to indexes'
for s in s2u_tr:
    s_set = set()
import sys
import MSD_util, MSD_rec

# paths to data
f_triplets_tr = "../data/train_data.txt"
f_triplets_vv = "../data/valid_visible.txt"
f_triplets_vp = "../data/valid_predict.txt"

# parameters
_tau = 500

print 'default ordering by popularity'
sys.stdout.flush()
songs_ordered = MSD_util.sort_dict_dec(
    MSD_util.song_to_count(f_triplets_tr, binary=False))

print 'user to songs on %s' % f_triplets_vv
u2s_vv = MSD_util.user_to_songs(f_triplets_vv)
print 'user to songs on %s' % f_triplets_vp
u2s_vp = MSD_util.user_to_songs(f_triplets_vp)

# recommend top N most popular songs (extremely unpersonalized :|)
all_recs = []
for u in u2s_vv:
    recs_500 = set(songs_ordered[:500]) - u2s_vv[u]
    recs4u = list(recs_500)
    if len(recs4u) < 500:
        n_more = 500 - len(recs4u)
        recs4u += songs_ordered[500:500 + n_more]
    all_recs.append(recs4u)
print "user_min: %d , user_max: %d" % (user_min, user_max)
sys.stdout.flush()

# TRIPLETS
f_triplets_tr = "kaggle_visible_evaluation_triplets.txt"
f_triplets_tev = "year1_valid_triplets_visible.txt"
f_triplets_teh = "year1_valid_triplets_hidden.txt"

print 'loading users in %s' % "kaggle_users.txt"
sys.stdout.flush()
users_v = list(MSD_util.load_users("user_valid.txt"))

print 'default ordering by popularity'
sys.stdout.flush()
songs_ordered = MSD_util.sort_dict_dec(MSD_util.song_to_count(f_triplets_tr))

print "loading unique users indexes"
uu = MSD_util.unique_users(f_triplets_tr)
u2i = {}
for i, u in enumerate(uu):
    u2i[u] = i

print 'song to users on %s' % f_triplets_tr
s2u_tr = MSD_util.song_to_users(f_triplets_tr)

print "converting users to indexes"
for s in s2u_tr:
    s_set = set()
    for u in s2u_tr[s]:
        s_set.add(u2i[u])
Пример #11
0
def main(argv):
    if len(argv) < 3:
        print(
            "Nee more arguments, Example:MSD_subm_rec.py user_min user_max resultFile.txt"
        )
        user_min = 1
        user_max = 110000
        osfile = "resultfull.txt"
        #exit()
    else:
        user_min = argv[0]
        user_max = argv[1]
        osfile = argv[2]

    user_min = int(user_min)
    user_max = int(user_max)

    print("user_min: %d , user_max: %d" % (user_min, user_max))
    sys.stdout.flush()

    # TRIPLETS
    f_triplets_tr = "train_triplets.txt"
    f_triplets_tev = "kaggle_visible_evaluation_triplets.txt"

    print('loading users in %s' % "kaggle_users.txt")
    sys.stdout.flush()
    users_v = list(MSD_util.load_users("kaggle_users.txt"))

    print('default ordering by popularity')
    sys.stdout.flush()
    songs_ordered = MSD_util.sort_dict_dec(
        MSD_util.song_to_count(f_triplets_tr))

    print("loading unique users indexes")
    uu = MSD_util.unique_users(f_triplets_tr)
    u2i = {}
    for i, u in enumerate(uu):
        u2i[u] = i

    print('song to users on %s' % f_triplets_tr)
    s2u_tr = MSD_util.song_to_users(f_triplets_tr)

    print("converting users to indexes")
    for s in s2u_tr:
        s_set = set()
        for u in s2u_tr[s]:
            s_set.add(u2i[u])
        s2u_tr[s] = s_set

    del u2i

    print('user to songs on %s' % f_triplets_tev)
    u2s_v = MSD_util.user_to_songs(f_triplets_tev)

    print('Creating predictor..')

    _A = 0.15
    _Q = 3
    ### calibrated
    ### pr=MSD_rec.PredSIc(s2u_tr, _A, _Q, "songs_scores.txt")

    ### uncalibrated
    pr = MSD_rec.PredSI(s2u_tr, _A, _Q)

    print('Creating recommender..')
    cp = MSD_rec.SReco(songs_ordered)
    cp.Add(pr)
    cp.Gamma = [1.0]

    r = cp.RecommendToUsers(users_v[user_min:user_max], u2s_v)
    MSD_util.save_recommendations(r, "kaggle_songs.txt", osfile)