def main(train_file, user_item_side_information_file, hierarchy_file, test_file): A = tsv_to_matrix(train_file) B = tsv_to_matrix(user_item_side_information_file) hierarchy = json.loads(open(hierarchy_file).read()) W = slim_train(A) ## LDA #lda = LDAHierarquical(B, hierarchy, topics=20) #recommendations_lda = slim_lda_recommender(A, W, lda) #compute_precision(recommendations_lda, test_file) ### recommendations_slim = slim_recommender(A, W) ## HSLIM from hslim import handle_user_bias, hierarchy_factory, normalize_wline, generate_subitem_hierarchy hierarchy = hierarchy_factory('data/hierarchy.json') K = slim_train(handle_user_bias(B)) Wline = generate_subitem_hierarchy(K, W, hierarchy) WlineNorm = normalize_wline(Wline) recommendations_other = slim_recommender(A, WlineNorm) ### kendall_tau_values = [] differences_values = [] for u in recommendations_slim.iterkeys(): ranking_slim = recommendations_slim[u][:RANKING_UNTIL] ranking_other = recommendations_other[u][:RANKING_UNTIL] kendall_tau_values.append(kendalltau(ranking_slim, ranking_other)) differences_values.append(RANKING_UNTIL - len(set(ranking_slim) & set(ranking_other))) # Differences plt.hist(differences_values) plt.xlabel('Size of difference') plt.ylabel('Amount of rankings') plt.title('Differences (novelty) between rankings') # Ranking comparison show_matplot_fig() plt.figure() plt.hist([i[0] for i in kendall_tau_values]) plt.xlabel('KendallTau Distance SLIM/SLIM LDA') plt.ylabel('Number of occurrences') plt.title('Comparison between rankings') show_matplot_fig()
def main(train_file, user_item_side_information_file, hierarchy_file, test_file): A = tsv_to_matrix(train_file) B = tsv_to_matrix(user_item_side_information_file) hierarchy = json.loads(open(hierarchy_file).read()) lda = LDAHierarquical(B, hierarchy, topics=15) #####REMOVE_IT def important_topics(x, topics): if not x: return x transf = [(i, j) for i, j in enumerate(x)] transf = sorted(transf, cmp=lambda x, y: cmp(x[1], y[1])) return [i[0] for i in transf[:topics]] topics = 3 coincidencias = [] for user in range(1, 101): # Topicos do usuario 10 user_topics = important_topics(lda.model['users'][user], topics) # Topicos das cidades de teste do usuario 10 T = tsv_to_matrix(test_file) cities = T[user].nonzero()[0] cities_topics = [ important_topics(lda.model['cities'].get(city, []), topics) for city in cities ] total = 0 topics_compared = 0 coinc = 0 for city_topic in cities_topics: if city_topic: coinc += len(set(user_topics) & set(city_topic)) topics_compared += len(user_topics) total += 1 else: pass if total: perc = (coinc / float(topics_compared)) else: perc = -1 coincidencias.append([coinc, topics_compared, perc]) aa = open('/tmp/coincidencias.json', 'w') aa.write(json.dumps(coincidencias)) aa.close() ##### W = slim_train(A) recommendations = slim_lda_recommender(A, W, lda) compute_precision(recommendations, test_file)
def main(train_file, user_item_side_information_file, hierarchy_file, test_file): A = tsv_to_matrix(train_file) B = tsv_to_matrix(user_item_side_information_file) hierarchy = json.loads(open(hierarchy_file).read()) W = slim_train(A) ## LDA #lda = LDAHierarquical(B, hierarchy, topics=20) #recommendations_lda = slim_lda_recommender(A, W, lda) #compute_precision(recommendations_lda, test_file) ### recommendations_slim = slim_recommender(A, W) ## HSLIM from hslim import handle_user_bias, hierarchy_factory, normalize_wline, generate_subitem_hierarchy hierarchy = hierarchy_factory('data/hierarchy.json') K = slim_train(handle_user_bias(B)) Wline = generate_subitem_hierarchy(K, W, hierarchy) WlineNorm = normalize_wline(Wline) recommendations_other = slim_recommender(A, WlineNorm) ### kendall_tau_values = [] differences_values = [] for u in recommendations_slim.iterkeys(): ranking_slim = recommendations_slim[u][:RANKING_UNTIL] ranking_other = recommendations_other[u][:RANKING_UNTIL] kendall_tau_values.append(kendalltau(ranking_slim, ranking_other)) differences_values.append(RANKING_UNTIL-len(set(ranking_slim) & set(ranking_other))) # Differences plt.hist(differences_values) plt.xlabel('Size of difference') plt.ylabel('Amount of rankings') plt.title('Differences (novelty) between rankings') # Ranking comparison show_matplot_fig() plt.figure() plt.hist([ i[0] for i in kendall_tau_values ]) plt.xlabel('KendallTau Distance SLIM/SLIM LDA') plt.ylabel('Number of occurrences') plt.title('Comparison between rankings') show_matplot_fig()
def main(train_file, test_file): A = tsv_to_matrix(train_file) W = slim_train(A) recommendations = slim_recommender(A, W) return compute_precision_as_an_oracle(recommendations, test_file)
def main(train_file, user_item_side_information_file, hierarchy_file, test_file): A = tsv_to_matrix(train_file) B = tsv_to_matrix(user_item_side_information_file) hierarchy = json.loads(open(hierarchy_file).read()) lda = LDAHierarquical(B, hierarchy, topics=15) #####REMOVE_IT def important_topics(x, topics): if not x: return x transf = [ (i, j) for i, j in enumerate(x) ] transf = sorted(transf, cmp=lambda x, y: cmp(x[1], y[1])) return [ i[0] for i in transf[:topics] ] topics = 3 coincidencias = [] for user in range(1, 101): # Topicos do usuario 10 user_topics = important_topics(lda.model['users'][user], topics) # Topicos das cidades de teste do usuario 10 T = tsv_to_matrix(test_file) cities = T[user].nonzero()[0] cities_topics = [ important_topics(lda.model['cities'].get(city, []), topics) for city in cities ] total = 0 topics_compared = 0 coinc = 0 for city_topic in cities_topics: if city_topic: coinc += len(set(user_topics) & set(city_topic)) topics_compared += len(user_topics) total += 1 else: pass if total: perc = (coinc/float(topics_compared)) else: perc = -1 coincidencias.append([coinc, topics_compared, perc]) aa = open('/tmp/coincidencias.json', 'w') aa.write(json.dumps(coincidencias)) aa.close() ##### W = slim_train(A) recommendations = slim_lda_recommender(A, W, lda) compute_precision(recommendations, test_file)
def main(train_file, user_sideinformation_file, hierarchy_file, test_file): A = tsv_to_matrix(train_file) B = tsv_to_matrix(user_sideinformation_file, A.shape[0], A.shape[1]) hierarchy = hierarchy_factory(hierarchy_file) # Learning using SLIM # We handle user bias only in B because in B we have explicit evaluations K = slim_train(handle_user_bias(B)) W = slim_train(A) Wline = generate_subitem_hierarchy(K, W, hierarchy) WlineNorm = normalize_wline(Wline) #recommendations = slim_recommender(A, W + 0.2 * WlineNorm) import pdb;pdb.set_trace() recommendations = slim_recommender(A, WlineNorm) # See if the predictor is just of not #user_cities = np.array([ map(hierarchy, B[i].nonzero()[0].tolist()) for i in range(B.shape[0]) ]) #G = tsv_to_matrix(test_file) #print 'TEM QUE DAR VAZIO: ', set(G[1].nonzero()[0]) & set(user_cities[1]) ### ---- FIM REMOVAME compute_precision(recommendations, test_file)