if not len(sys.argv) == 1: leave_k_out = int(sys.argv[1]) print 'Use default leave k out: k=' + str(leave_k_out) if not len(sys.argv) <= 2: lafactor = int(sys.argv[2]) print 'Use default latent factor: ' + str(lafactor) max_rank = 2000 # number of repetitions. total_iteration = 2 # recommendation algorithms RandUV(latent_factor=lafactor), method_list = [ HierLat(latent_factor=lafactor, cold_start = HierLat.CS_EQUAL_PROB), \ LMaFit(latent_factor=lafactor), NMF(latent_factor=lafactor), PMF(latent_factor=lafactor), TriUHV(latent_factor=lafactor) ] # main method. result = experiment_coldstart_map(exp_name, daily_data_file, \ min_occ_user, min_occ_prog, num_user, num_prog,\ method_list, leave_k_out, total_iteration, max_rank, binary = False) matlab_output = {} for method_name, method_iter_perf in result.items(): print 'Method: ' + method_name rmse = sum(x['RMSE'] for x in method_iter_perf) / len(method_iter_perf) print '>>Average RMSE : %.5f' % rmse perf_recall = np.zeros(len(method_iter_perf[0]['recall']))
exp_name = 'test_exp_mid_prec_rec'; # something meaningful. # filtering criteria min_occ_user = 35; min_occ_prog = 300; top_n = 50; # performance computed on top N; leave_k_out = 10; # perform leave k out. # number of repetitions. total_iteration = 3; # recommendation algorithms method_list = [ LMaFit(latent_factor=lafactor), RandUV(latent_factor=lafactor), \ HierLat(latent_factor=lafactor), NMF(latent_factor=lafactor), PMF(latent_factor=lafactor), TriUHV(latent_factor=lafactor) ]; # main method. result = experiment_leave_k_out(exp_name, daily_data_file, min_occ_user, min_occ_prog, \ method_list, leave_k_out, total_iteration, top_n); # display results (average RMSE). for method_name, method_iter_perf in result.items(): print 'Method: '+ method_name; print '>>Average precision : %.5f' % (sum( x['prec'] for x in method_iter_perf)/len(method_iter_perf)); print '>>Average recall : %.5f' % (sum( x['recall'] for x in method_iter_perf)/len(method_iter_perf)); print '>>Average rmse : %.5f' % (sum( x['rmse'] for x in method_iter_perf)/len(method_iter_perf)); #print method_iter_perf; #print result;
#feedback_data = reader.read_file_with_minval(filename, 25, 300); feedback_data = reader.read_file_with_minval(filename, 35, 300); print feedback_data; print 'Maximum Genre.' print np.max(feedback_data.meta['pggr_gr']) + 1; print 'Normalizing data.' feedback_data.normalize_row(); # build model with 3 latent factors. r = 5; # the L_2 norm regularizer lamb = 0.001; # the stopping delta value delta = 0.01; # the maximum iteration number maxiter = 500; HierLat_model = HierLat(r,lamb,delta,maxiter, verbose = True); #HierLat_model.train(feedback_data, simplex_projection = False); HierLat_model.train(feedback_data, simplex_projection = True); ''' # test. loc_row = [200, 4, 105]; loc_col = [ 10, 22, 4]; print 'Prediction:' print HierLat_model.predict(loc_row, loc_col); '''
# filtering criteria. min_occ_user = 50 min_occ_prog = 1000 num_user = 10000 num_prog = 3000 total_iteration = 2 iteration = 1 # iteration out of total_iteration. leave_k_out = 20 lafactor = 5 method = HierLat(latent_factor=lafactor) hash_file_str = str(hash(tuple(daily_data_file))) reader = DailyWatchTimeReader() feedback_data = reader.read_file_with_minval(daily_data_file, min_occ_user, min_occ_prog, num_user, num_prog) exp_id = 'lko_bi_' + exp_name + '_data' + hash_file_str\ + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \ + '_nu' + str(num_user) + '_np' + str(num_prog) \ + '_k' + str(leave_k_out) + '_toiter' + str(total_iteration) result_resource_str = 'exp' + exp_id + \ '_method' + method.unique_str() + \ '_iter' + str(iteration)
# filtering criteria. min_occ_user = 50; min_occ_prog = 1000; num_user = 10000; num_prog = 3000; total_iteration = 2; iteration = 1; # iteration out of total_iteration. leave_k_out = 20; lafactor = 5; method = HierLat(latent_factor=lafactor); hash_file_str = str(hash(tuple(daily_data_file))); reader = DailyWatchTimeReader(); feedback_data = reader.read_file_with_minval(daily_data_file, min_occ_user, min_occ_prog, num_user, num_prog); exp_id = 'lko_bi_' + exp_name + '_data' + hash_file_str\ + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \ + '_nu' + str(num_user) + '_np' + str(num_prog) \ + '_k' + str(leave_k_out) + '_toiter' + str(total_iteration); result_resource_str = 'exp' + exp_id + \ '_method' + method.unique_str() + \ '_iter' + str(iteration); sub_folder = exp_id + '/models/' + method.unique_str(); # use a sub folder to store the experiment resource.
# filtering criteria min_occ_user = 4; min_occ_prog = 1; top_n = 15; # performance computed on top N; leave_k_out = 1; # perform leave k out. # number of repetitions. total_iteration = 3; # latent factor lf = 5; # recommendation algorithms method_list = [ LMaFit(latent_factor = 5), RandUV(latent_factor = 5), \ HierLat(latent_factor = 5) , NMF(latent_factor = 5) ]; # main method. result = experiment_leave_k_out(exp_name, daily_data_file, min_occ_user, min_occ_prog, \ method_list, leave_k_out, total_iteration, top_n); # display results (average RMSE). for method_name, method_iter_perf in result.items(): print 'Method: '+ method_name; print '>>Average precision : %.5f' % (sum( x['prec'] for x in method_iter_perf)/len(method_iter_perf)); print '>>Average recall : %.5f' % (sum( x['recall'] for x in method_iter_perf)/len(method_iter_perf)); print '>>Average rmse : %.5f' % (sum( x['rmse'] for x in method_iter_perf)/len(method_iter_perf)); #print method_iter_perf; #print result;
# filtering criteria min_occ_user = 35 min_occ_prog = 300 # specify the percentage of training and (1 - training_prec) is testing. training_prec = 0.5 # number of repetitions. total_iteration = 5 # latent factor lf = 10 # recommendation algorithms method_list = [ LMaFit(latent_factor=lf), RandUV(latent_factor=lf), HierLat(latent_factor=lf) ] # main method. result = experiment_rand_split(exp_name, daily_data_file, min_occ_user, min_occ_prog, \ method_list, training_prec, total_iteration) # display results (average RMSE). for method_name, method_iter_perf in result.items(): print 'Method: ' + method_name print '>>Average performance RMSE: %.5f' % ( sum(x for x in method_iter_perf) / len(method_iter_perf)) #print result;