def main(): args = sys.argv set_level = args[1] train_prob = args[2] top_n = int(args[3]) #Filepath config item_tag_file = './song_dataset/mid_data/song_tag_distribution.json' #item_tag_file = './song_dataset/mid_data/song_tag_dist_with_singer.json' user_tag_file = './song_dataset/mid_data/user_tag_distribution_%s_%s.json'%(set_level,train_prob) file_template = './song_dataset/user_dataset_%s_%s_%s' #set_num,type,train_prob train_file = file_template%(set_level,'train',train_prob) test_file = file_template%(set_level,'test',train_prob) user_sim_file = './song_dataset/mid_data/user_similarity_withTag_%s_%s.json'%(set_level,train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file,test_file) logging.info("Build dataset cost:%s"%(dataset.cost_time)) print "DataForTrain: %s"%(train_file) print "DataForTest: %s"%(test_file) print "Dataset train_set info: %s"%(dataset.get_train_info()) print "Dataset test_set info: %s"%(dataset.get_test_info()) #Record best scores best_f_score = {'f_score':0} best_precision = {'precision':0} best_recall = {'recall':0} #Initiate recommender recommender = UserTagCF() recommender.build_userTagDistribution(dataset.train_data,item_tag_file,user_tag_file) recommender.build_user_similarity(dataset.train_data,user_sim_file,top_user_k=1000) #Recommendation for user_k in [5]+range(10,101,10): recommender.recommend(dataset.train_data,user_k=user_k,top_n=top_n) logging.info("Train_prob:%s User_k:%s Top_n:%s cost:%s"%(train_prob,user_k,top_n,recommender.cost_time)) scores = recommender.score(dataset.test_data,len(dataset.all_songs)) print "User_k:%s\tTop_n:%s\tScores:%s"%(user_k,top_n,scores) #Find Best Score if scores['f_score'] > best_f_score['f_score']: best_f_score = scores best_f_score['user_k'] = user_k best_f_score['top_n'] = top_n if scores['precision'] > best_precision['precision']: best_precision = scores best_precision['user_k']=user_k best_precision['top_n'] = top_n if scores['recall'] > best_recall['recall']: best_recall = scores best_recall['user_k']=user_k best_recall['top_n'] = top_n print "Best_F_Score: %s"%(best_f_score) print "Best_Precision: %s"%(best_precision) print "Best_Recall: %s"%(best_recall)
def main(): args = sys.argv set_level = args[1] train_prob = args[2] top_n = int(args[3]) #File path config file_template = './song_dataset/user_dataset_%s_%s_%s' #set_num,type,train_prob user_sim_file = './song_dataset/mid_data/user_sim_%s_%s.json'%(set_level,train_prob) # user-user simiarity matrix train_file = file_template%(set_level,'train',train_prob) test_file = file_template%(set_level,'test',train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file,test_file) logging.info("Build dataset cost:%s"%(dataset.cost_time)) print "DataForTrain: %s"%(train_file) print "DataForTest: %s"%(test_file) print "Dataset train_set info: %s"%(dataset.get_train_info()) print "Dataset test_set info: %s"%(dataset.get_test_info()) #Record best scores best_f_score = {'f_score':0} best_precision = {'precision':0} best_recall = {'recall':0} #Initiate Recommender recommender = UserCF() recommender.build_user_similarity(dataset.train_data,user_sim_file,top_user_k=1000) #Top_user_k represent keep top k sim_user to file #Recommendation for user_k in [5]+range(10,101,10): recommender.recommend(dataset.train_data,user_k=user_k,top_n=top_n) logging.info("Train_prob:%s User_k:%s Top_n:%s cost:%s"%(train_prob,user_k,top_n,recommender.cost_time)) scores = recommender.score(dataset.test_data,len(dataset.all_songs)) print "User_k:%s\tTop_n:%s\tScores:%s"%(user_k,top_n,scores) #Find Best Score if scores['f_score'] > best_f_score['f_score']: best_f_score = scores best_f_score['user_k'] = user_k best_f_score['top_n'] = top_n if scores['precision'] > best_precision['precision']: best_precision = scores best_precision['user_k']=user_k best_precision['top_n'] = top_n if scores['recall'] > best_recall['recall']: best_recall = scores best_recall['user_k']=user_k best_recall['top_n'] = top_n print "Best_F_Score: %s"%(best_f_score) print "Best_Precision: %s"%(best_precision) print "Best_Recall: %s"%(best_recall)
def main(): args = sys.argv set_level = args[1] train_prob = args[2] e_type = args[3] #Experiment type: song or playlist dataset = BaseDataSet() file_template = './song_dataset/user_dataset_%s_%s_%s' #set_levle,type,train_prob if e_type == 'playlist': file_template = './pl_dataset/user_playlist_%s_%s_%s' #set_levle,type,train_prob train_file = file_template % (set_level, 'train', train_prob) test_file = file_template % (set_level, 'test', train_prob) dataset.build_data(train_file, test_file) logging.info("Build dataset cost:%s", dataset.cost_time) print "DataForTrain: %s" % (train_file) print "DataForTest: %s" % (test_file) print "Dataset train_set info: %s" % (dataset.get_train_info()) print "Dataset test_set info: %s" % (dataset.get_test_info()) #Record best scores best_f_score = {'f_score': 0} best_precision = {'precision': 0} best_recall = {'recall': 0} #Initiate Recommender recommender = Popularity() for i in [1, 50, 100, 150, 200]: recommender.recommend(dataset.train_data, i) logging.info("Train_prob:%s Recommend Top_n:%s cost:%s" % (train_prob, i, recommender.cost_time)) #logging.info("Top_10_song:%s"%(recommender.get_poplist(10))) scores = recommender.score(dataset.test_data) print "Top_n:%s\tScores:%s" % (i, scores) #Find best scores if scores['f_score'] > best_f_score['f_score']: best_f_score = scores best_f_score['top_n'] = i if scores['precision'] > best_precision['precision']: best_precision = scores best_precision['top_n'] = i if scores['recall'] > best_recall['recall']: best_recall = scores best_recall['top_n'] = i print "Best_F_Score: %s" % (best_f_score) print "Best_Precision: %s" % (best_precision) print "Best_Recall: %s" % (best_recall)
def main(): args = sys.argv set_level = args[1] train_prob = args[2] e_type = args[3] #Experiment type: song or playlist dataset = BaseDataSet() file_template = './song_dataset/user_dataset_%s_%s_%s' #set_levle,type,train_prob if e_type == 'playlist': file_template = './pl_dataset/user_playlist_%s_%s_%s' #set_levle,type,train_prob train_file = file_template%(set_level,'train',train_prob) test_file = file_template%(set_level,'test',train_prob) dataset.build_data(train_file,test_file) logging.info("Build dataset cost:%s",dataset.cost_time) print "DataForTrain: %s"%(train_file) print "DataForTest: %s"%(test_file) print "Dataset train_set info: %s"%(dataset.get_train_info()) print "Dataset test_set info: %s"%(dataset.get_test_info()) #Record best scores best_f_score = {'f_score':0} best_precision = {'precision':0} best_recall = {'recall':0} #Initiate Recommender recommender = Popularity() for i in [1,50,100,150,200]: recommender.recommend(dataset.train_data,i) logging.info("Train_prob:%s Recommend Top_n:%s cost:%s"%(train_prob,i,recommender.cost_time)) #logging.info("Top_10_song:%s"%(recommender.get_poplist(10))) scores = recommender.score(dataset.test_data) print "Top_n:%s\tScores:%s"%(i,scores) #Find best scores if scores['f_score'] > best_f_score['f_score']: best_f_score = scores best_f_score['top_n'] = i if scores['precision'] > best_precision['precision']: best_precision = scores best_precision['top_n'] = i if scores['recall'] > best_recall['recall']: best_recall = scores best_recall['top_n'] = i print "Best_F_Score: %s"%(best_f_score) print "Best_Precision: %s"%(best_precision) print "Best_Recall: %s"%(best_recall)
def main(): args = sys.argv set_level = args[1] train_prob = args[2] top_n = int(args[3]) e_type = args[4] #Experiment type: song or playlist #Filepath config file_template = './song_dataset/user_dataset_%s_%s_%s' #set_num,type,train_prob item_sim_file = './song_dataset/mid_data/item_similarity_%s_%s.json' % ( set_level, train_prob) if e_type == 'playlist': file_template = './pl_dataset/user_playlist_%s_%s_%s' #set_num,type,train_prob item_sim_file = './pl_dataset/mid_data/item_similarity_%s_%s.json' % ( set_level, train_prob) train_file = file_template % (set_level, 'train', train_prob) test_file = file_template % (set_level, 'test', train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file, test_file) logging.info("Build dataset cost:%s" % (dataset.cost_time)) print "DataForTrain: %s" % (train_file) print "DataForTest: %s" % (test_file) print "Dataset train_set info: %s" % (dataset.get_train_info()) print "Dataset test_set info: %s" % (dataset.get_test_info()) #Record best scores best_f_score = {'f_score': 0} best_precision = {'precision': 0} best_recall = {'recall': 0} #Initiate recommender itemCF_recommender = ItemCF() if os.path.exists(item_sim_file): logging.info("File %s exists, loading item similarity matrix" % (item_sim_file)) itemCF_recommender.load_item_similarity(item_sim_file) logging.info("Load item_similarity cost: %s" % (itemCF_recommender.cost_time)) else: logging.info("File %s doesn't exist, building item similarity matrix" % (item_sim_file)) itemCF_recommender.build_item_similarity(dataset.train_data, item_sim_file) logging.info("Load item_similarity cost: %s" % (itemCF_recommender.cost_time)) #Recommendation for item_k in range(20, 100): itemCF_recommender.recommend(dataset.train_data, item_k=item_k, top_n=top_n) logging.info("Train_prob:%s Item_k:%s Top_n:%s Cost:%s" % (train_prob, item_k, top_n, itemCF_recommender.cost_time)) scores = itemCF_recommender.score(dataset.test_data) print "Item_k:%s\tTop_n:%s\tScores:%s" % (item_k, top_n, scores) #Find Best Score if scores['f_score'] > best_f_score['f_score']: best_f_score = scores best_f_score['item_k'] = item_k best_f_score['top_n'] = top_n if scores['precision'] > best_precision['precision']: best_precision = scores best_precision['item_k'] = item_k best_precision['top_n'] = top_n if scores['recall'] > best_recall['recall']: best_recall = scores best_recall['item_k'] = item_k best_recall['top_n'] = top_n print "Best_F_Score: %s" % (best_f_score) print "Best_Precision: %s" % (best_precision) print "Best_Recall: %s" % (best_recall)
def main(): args = sys.argv set_level = args[1] train_prob = args[2] topic_num = int(args[3]) top_n = int(args[4]) recommend_job = args[5] #Log config log_file = './log/hybirdModel_%s_%s_%s_%s_%s.log'%(set_level,train_prob,topic_num,recommend_job,top_n) logging.basicConfig(level=logging.INFO,format='%(asctime)s %(levelname)s %(funcName)s %(lineno)d %(message)s',filename=log_file,filemode='w') #Filepath config file_template = './song_dataset/user_dataset_%s_%s_%s' #set_level, type, train_prob user_sim_file = './song_dataset/mid_data/user_sim_%s_%s.json'%(set_level,train_prob) userTag_sim_file = './song_dataset/mid_data/user_similarity_withTag_%s_%s.json'%(set_level,train_prob) userLDA_sim_file = './song_dataset/mid_data/user_sim_with_lda_%s_%s_%s.json'%(set_level,train_prob,topic_num) train_file = file_template%(set_level,'train',train_prob) test_file = file_template%(set_level,'test', train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file,test_file) logging.info("Build dataset cost:%s"%(dataset.cost_time)) print "DataForTrain: %s"%(train_file) print "DataForTest: %s"%(test_file) print "Dataset train_set info: %s"%(dataset.get_train_info()) print "Dataset test_set info: %s"%(dataset.get_test_info()) #Record best scores best_f_score = {'f_score':0} best_precision = {'precision':0} best_recall = {'recall':0} #Data Preparation items_tag_dict = {} users_tag_dict = {} if recommend_job in ('mix_result_reorder','mix_sim_reorder'): items_tag_dict = load_tag_distribution('./song_dataset/mid_data/song_tag_distribution.json') #Load item_tag_distrib user_tag_file = './song_dataset/mid_data/user_tag_distribution_%s_%s.json'%(set_level,train_prob) users_tag_dict = load_tag_distribution(user_tag_file) #Initiate Hybird-Model recommender = HybirdModel() if recommend_job in ('mix_sim','mix_sim_reorder'): recommender.hybird_user_sim(dataset.train_data,userTag_sim_file,userLDA_sim_file,theta=0.45) elif recommend_job in ('mix_result','mix_result_reorder'): recommender.userTag.load_user_similarity(userTag_sim_file,norm=1) recommender.userLda.load_user_similarity(userLDA_sim_file,norm=1) for user_k in [5]+range(10,101,10): if recommend_job == 'mix_sim': recommender.recommend(dataset.train_data,users_tag_dict,items_tag_dict,user_k,top_n,reorder=0) elif recommend_job == 'mix_sim_reorder': recommender.recommend(dataset.train_data,users_tag_dict,items_tag_dict,user_k,top_n,reorder=1) elif recommend_job == 'mix_result': recommender.hybird_recommend_result(dataset.train_data,user_k,top_n) elif recommend_job == 'mix_result_reorder': recommender.hybird_result_withReorder(dataset.train_data,users_tag_dict,items_tag_dict,user_k,top_n) logging.info("Train_prob:%s User_k:%s Top_n:%s cost:%s"%(train_prob,user_k,top_n,recommender.cost_time)) scores = recommender.score(dataset.test_data,len(dataset.all_songs)) print "User_k:%s\tTop_n:%s\tScores:%s"%(user_k,top_n,scores) #Find Best Score if scores['f_score'] > best_f_score['f_score']: best_f_score = scores best_f_score['user_k'] = user_k best_f_score['top_n'] = top_n if scores['precision'] > best_precision['precision']: best_precision = scores best_precision['user_k']=user_k best_precision['top_n'] = top_n if scores['recall'] > best_recall['recall']: best_recall = scores best_recall['user_k']=user_k best_recall['top_n'] = top_n print "Best_F_Score: %s"%(best_f_score) print "Best_Precision: %s"%(best_precision) print "Best_Recall: %s"%(best_recall)
def main(): args = sys.argv set_level = args[1] train_prob = args[2] top_n = int(args[3]) e_type = args[4] #Experiment type: song or playlist #Filepath config file_template = './song_dataset/user_dataset_%s_%s_%s' #set_num,type,train_prob item_sim_file = './song_dataset/mid_data/item_similarity_%s_%s.json'%(set_level,train_prob) if e_type == 'playlist': file_template = './pl_dataset/user_playlist_%s_%s_%s' #set_num,type,train_prob item_sim_file = './pl_dataset/mid_data/item_similarity_%s_%s.json'%(set_level,train_prob) train_file = file_template%(set_level,'train',train_prob) test_file = file_template%(set_level,'test',train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file,test_file) logging.info("Build dataset cost:%s"%(dataset.cost_time)) print "DataForTrain: %s"%(train_file) print "DataForTest: %s"%(test_file) print "Dataset train_set info: %s"%(dataset.get_train_info()) print "Dataset test_set info: %s"%(dataset.get_test_info()) #Record best scores best_f_score = {'f_score':0} best_precision = {'precision':0} best_recall = {'recall':0} #Initiate recommender itemCF_recommender = ItemCF() if os.path.exists(item_sim_file): logging.info("File %s exists, loading item similarity matrix"%(item_sim_file)) itemCF_recommender.load_item_similarity(item_sim_file) logging.info("Load item_similarity cost: %s"%(itemCF_recommender.cost_time)) else: logging.info("File %s doesn't exist, building item similarity matrix"%(item_sim_file)) itemCF_recommender.build_item_similarity(dataset.train_data,item_sim_file) logging.info("Load item_similarity cost: %s"%(itemCF_recommender.cost_time)) #Recommendation for item_k in range(20,100): itemCF_recommender.recommend(dataset.train_data,item_k=item_k,top_n=top_n) logging.info("Train_prob:%s Item_k:%s Top_n:%s Cost:%s"%(train_prob,item_k,top_n,itemCF_recommender.cost_time)) scores = itemCF_recommender.score(dataset.test_data) print "Item_k:%s\tTop_n:%s\tScores:%s"%(item_k,top_n,scores) #Find Best Score if scores['f_score'] > best_f_score['f_score']: best_f_score = scores best_f_score['item_k'] = item_k best_f_score['top_n'] = top_n if scores['precision'] > best_precision['precision']: best_precision = scores best_precision['item_k']=item_k best_precision['top_n'] = top_n if scores['recall'] > best_recall['recall']: best_recall = scores best_recall['item_k']=item_k best_recall['top_n'] = top_n print "Best_F_Score: %s"%(best_f_score) print "Best_Precision: %s"%(best_precision) print "Best_Recall: %s"%(best_recall)
def main(): args = sys.argv set_level = args[1] train_prob = args[2] topic_num = int(args[3]) top_n = int(args[4]) e_type = args[5] #e_type: song or playlist #Log-Config logfile = './log/userLDA_%s_%s_%s.log'%(set_level,train_prob,topic_num) logging.basicConfig(level=logging.INFO,format='%(asctime)s %(levelname)s %(funcName)s %(lineno)d %(message)s',filename=logfile,filemode='w') #logging.basicConfig(level=logging.INFO,format='%(asctime)s %(levelname)s %(funcName)s %(lineno)d %(message)s') #File path config user_sim_file = './song_dataset/mid_data/user_sim_with_lda_%s_%s_%s_new.json'%(set_level,train_prob,topic_num) file_template = './song_dataset/user_dataset_%s_%s_%s' #set_level, type, train_prob if e_type == 'playlist': user_sim_file = './pl_dataset/mid_data/user_sim_with_lda_%s_%s_%s.json'%(set_level,train_prob,topic_num) file_template = './pl_dataset/user_playlist_%s_%s_%s' #set_level, type, train_prob train_file = file_template%(set_level,'train',train_prob) test_file = file_template%(set_level,'test',train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file,test_file) logging.info("Build dataset cost:%s"%(dataset.cost_time)) print "DataForTrain: %s"%(train_file) print "DataForTest: %s"%(test_file) print "Dataset train_set info: %s"%(dataset.get_train_info()) print "Dataset test_set info: %s"%(dataset.get_test_info()) #Record best scores best_f_score = {'f_score':0} best_precision = {'precision':0} best_recall = {'recall':0} #Initiate Recommender recommender = UserLDA() recommender.build_user_similarity(user_sim_file,dataset.train_data,topic_num=topic_num, top_user_k=1000) #Recommendation for user_k in [5]+range(10,101,10): recommender.recommend(dataset.train_data,user_k=user_k,top_n=top_n) logging.info("Train_prob:%s User_k:%s Top_n:%s cost:%s"%(train_prob,user_k,top_n,recommender.cost_time)) scores = recommender.score(dataset.test_data,len(dataset.all_songs)) print "User_k:%s\tTop_n:%s\tScores:%s"%(user_k,top_n,scores) #Find Best Score if scores['f_score'] > best_f_score['f_score']: best_f_score = scores best_f_score['user_k'] = user_k best_f_score['top_n'] = top_n if scores['precision'] > best_precision['precision']: best_precision = scores best_precision['user_k']=user_k best_precision['top_n'] = top_n if scores['recall'] > best_recall['recall']: best_recall = scores best_recall['user_k']=user_k best_recall['top_n'] = top_n print "Best_F_Score: %s"%(best_f_score) print "Best_Precision: %s"%(best_precision) print "Best_Recall: %s"%(best_recall)
def main(): args = sys.argv set_level = args[1] train_prob = args[2] topic_num = int(args[3]) top_n = int(args[4]) recommend_job = args[5] #Log config log_file = './log/hybirdModel_%s_%s_%s_%s_%s.log' % ( set_level, train_prob, topic_num, recommend_job, top_n) logging.basicConfig( level=logging.INFO, format='%(asctime)s %(levelname)s %(funcName)s %(lineno)d %(message)s', filename=log_file, filemode='w') #Filepath config file_template = './song_dataset/user_dataset_%s_%s_%s' #set_level, type, train_prob user_sim_file = './song_dataset/mid_data/user_sim_%s_%s.json' % ( set_level, train_prob) userTag_sim_file = './song_dataset/mid_data/user_similarity_withTag_%s_%s.json' % ( set_level, train_prob) userLDA_sim_file = './song_dataset/mid_data/user_sim_with_lda_%s_%s_%s.json' % ( set_level, train_prob, topic_num) train_file = file_template % (set_level, 'train', train_prob) test_file = file_template % (set_level, 'test', train_prob) #Build dataset dataset = BaseDataSet() dataset.build_data(train_file, test_file) logging.info("Build dataset cost:%s" % (dataset.cost_time)) print "DataForTrain: %s" % (train_file) print "DataForTest: %s" % (test_file) print "Dataset train_set info: %s" % (dataset.get_train_info()) print "Dataset test_set info: %s" % (dataset.get_test_info()) #Record best scores best_f_score = {'f_score': 0} best_precision = {'precision': 0} best_recall = {'recall': 0} #Data Preparation items_tag_dict = {} users_tag_dict = {} if recommend_job in ('mix_result_reorder', 'mix_sim_reorder'): items_tag_dict = load_tag_distribution( './song_dataset/mid_data/song_tag_distribution.json' ) #Load item_tag_distrib user_tag_file = './song_dataset/mid_data/user_tag_distribution_%s_%s.json' % ( set_level, train_prob) users_tag_dict = load_tag_distribution(user_tag_file) #Initiate Hybird-Model recommender = HybirdModel() if recommend_job in ('mix_sim', 'mix_sim_reorder'): recommender.hybird_user_sim(dataset.train_data, userTag_sim_file, userLDA_sim_file, theta=0.45) elif recommend_job in ('mix_result', 'mix_result_reorder'): recommender.userTag.load_user_similarity(userTag_sim_file, norm=1) recommender.userLda.load_user_similarity(userLDA_sim_file, norm=1) for user_k in [5] + range(10, 101, 10): if recommend_job == 'mix_sim': recommender.recommend(dataset.train_data, users_tag_dict, items_tag_dict, user_k, top_n, reorder=0) elif recommend_job == 'mix_sim_reorder': recommender.recommend(dataset.train_data, users_tag_dict, items_tag_dict, user_k, top_n, reorder=1) elif recommend_job == 'mix_result': recommender.hybird_recommend_result(dataset.train_data, user_k, top_n) elif recommend_job == 'mix_result_reorder': recommender.hybird_result_withReorder(dataset.train_data, users_tag_dict, items_tag_dict, user_k, top_n) logging.info("Train_prob:%s User_k:%s Top_n:%s cost:%s" % (train_prob, user_k, top_n, recommender.cost_time)) scores = recommender.score(dataset.test_data, len(dataset.all_songs)) print "User_k:%s\tTop_n:%s\tScores:%s" % (user_k, top_n, scores) #Find Best Score if scores['f_score'] > best_f_score['f_score']: best_f_score = scores best_f_score['user_k'] = user_k best_f_score['top_n'] = top_n if scores['precision'] > best_precision['precision']: best_precision = scores best_precision['user_k'] = user_k best_precision['top_n'] = top_n if scores['recall'] > best_recall['recall']: best_recall = scores best_recall['user_k'] = user_k best_recall['top_n'] = top_n print "Best_F_Score: %s" % (best_f_score) print "Best_Precision: %s" % (best_precision) print "Best_Recall: %s" % (best_recall)