tr_model = NMF(n_components=K, init='random', random_state=0, max_iter=1000, beta_loss=loss, solver='mu') tr_U_f = tr_model.fit_transform(zi_tr_graph_sparse) # user features tr_I_f = tr_model.components_ # item features # Train lookup model with item features fixed # 2. Train on graph_lookup: fix item_feat zi_lu_graph = zero_index_sparse_graph(lu_graph, axis=1, convert=lu_I_zero_indexer) zi_lu_graph = zero_index_sparse_graph(zi_lu_graph, axis=0, convert=lu_U_zero_indexer) zi_lu_graph_sparse = csr_matrix((zi_lu_graph[1], (zi_lu_graph[0][:, 0], zi_lu_graph[0][:, 1])), shape=(n_lu_U, n_lu_I)) lu_model = trainedNMF(components_=tr_I_f[:, lu_I], n_components=K, init='random', random_state=0, max_iter=1000, beta_loss=loss, solver='mu') ts_U_f = lu_U_f = lu_model.transform(zi_lu_graph_sparse) ts_I_f = tr_I_f[:, ts_I] predictions = np.matmul(lu_U_f, ts_I_f) zi_ts_graph = zero_index_sparse_graph(ts_graph, axis=0, convert=ts_U_zero_indexer) zi_ts_graph = zero_index_sparse_graph(zi_ts_graph, axis=1, convert=ts_I_zero_indexer) # topk = torch.topk(torch.tensor(predictions), n_ts_I) # 3. predict graph_test: evaluate nDCG_score = nDCG(np.r_[0:ts_U.shape[0]], topk[1], zi_ts_graph[0]) m = 20 precision_score = precision_at_m(np.r_[0:ts_U.shape[0]], topk[1], zi_ts_graph[0], m=m) print("nDCG Score for is {}".format(np.mean(nDCG_score))) print("Precision at {} is {}".format(m, np.mean(precision_score)))
init_std=0.01, verbose=True, dtype='float32', recompute_factors=wmf.recompute_factors_bias) ts_U = np.unique(test[0][:, 0]) zi_test, test_convert = zero_index_sparse_graph(test) mask_edges = train[0][np.in1d(train[0][:, 0], ts_U)] mask_edges[:, 0] = test_convert[mask_edges[:, 0]] test_user_features = user_features[ts_U] predictions = np.matmul(test_user_features, item_features.T) # Scatter update might be faster but for correctness for edge in mask_edges: predictions[tuple(edge)] = 0. # Recommend top_k topk = torch.topk(torch.tensor(predictions), I.shape[0])[1].numpy() # Evaluate: More metrics? nDCG_score = nDCG(np.r_[0:ts_U.shape[0]], topk, zi_test[0]) m=20 precision_score = precision_at_m(np.r_[0:ts_U.shape[0]], topk, zi_test[0], m) print("nDCG Score for is {}".format(np.mean(nDCG_score))) print("Precision at {} is {}".format(m, np.mean(precision_score)))
def main(data, p, q, K, M, num_iterations, alpha, lambda_reg, init_std): # loc = "/Users/ekansh/repos/data/{}" # ds = "nyt/" # loss = 'kullback-leibler' # # loss = "frobenius" graph = (data[:, :2], data[:, 2]) graph, _ = zero_index_sparse_graph(graph, axis = 0) graph, _ = zero_index_sparse_graph(graph, axis = 1) U = np.unique(graph[0][:, 0]) nU = U.shape[0] I = np.unique(graph[0][:, 1]) nI = I.shape[0] # Split data # Data Split tr_graph, lu_graph, ts_graph, tr_U, lu_I = pq_samp_split(graph, p, q) U = np.unique(graph[0][:, 0]) nU = U.shape[0] # THIS IS CONFUSING. FIX IT! tr_U_zero_indexer = zero_index(tr_U, True) I = np.unique(graph[0][:, 1]) nI = I.shape[0] n_tr_U = tr_U.shape[0] tr_I = I n_tr_I = nI lu_U = ts_U = np.setdiff1d(U, tr_U, assume_unique=True) n_lu_U = n_ts_U = lu_U.shape[0] lu_U_zero_indexer = ts_U_zero_indexer = zero_index(lu_U, True) n_lu_I = lu_I.shape[0] lu_I_zero_indexer = zero_index(lu_I, True) ts_I = np.setdiff1d(I, lu_I, assume_unique=True) n_ts_I = ts_I.shape[0] ts_I_zero_indexer = zero_index(ts_I, True) ## Train NMF # K = 10 # print("Using {} loss".format(loss)) # model = NMF(n_components=K, init='random', random_state=0, beta_loss=loss, solver='mu', max_iter=1000) zi_tr_graph = zero_index_sparse_graph(tr_graph, axis=0, convert=tr_U_zero_indexer) zi_tr_graph_sparse = csr_matrix((zi_tr_graph[1], (zi_tr_graph[0][:, 0], zi_tr_graph[0][:, 1])), shape=(n_tr_U, n_tr_I)) S_tr_sparse = wmf.log_surplus_confidence_matrix(zi_tr_graph_sparse, alpha=alpha, epsilon=TINY) tr_U_f, tr_I_f = wmf.factorize(S_tr_sparse, num_factors=K, lambda_reg=lambda_reg, num_iterations=num_iterations, init_std=init_std, verbose=True, dtype='float32', recompute_factors=wmf.recompute_factors_bias) tr_I_f = tr_I_f.T # Train lookup model with item features fixed # 2. Train on graph_lookup: fix item_feat zi_lu_graph = zero_index_sparse_graph(lu_graph, axis=1, convert=lu_I_zero_indexer) zi_lu_graph = zero_index_sparse_graph(zi_lu_graph, axis=0, convert=lu_U_zero_indexer) zi_lu_graph_sparse = csr_matrix((zi_lu_graph[1], (zi_lu_graph[0][:, 0], zi_lu_graph[0][:, 1])), shape=(n_lu_U, n_lu_I)) S_lu_sparse = wmf.log_surplus_confidence_matrix(zi_lu_graph_sparse, alpha=alpha, epsilon=TINY) lu_U_f, _ = wmf.factorize(zi_lu_graph_sparse, num_factors=K, lambda_reg=lambda_reg, num_iterations=num_iterations, init_std=init_std, verbose=True, dtype='float32', recompute_factors=wmf.recompute_factors_bias, V=tr_I_f[:, lu_I].T) ts_U_f = lu_U_f ts_I_f = tr_I_f[:, ts_I] predictions = np.matmul(ts_U_f, ts_I_f) zi_ts_graph = zero_index_sparse_graph(ts_graph, axis=0, convert=ts_U_zero_indexer) zi_ts_graph = zero_index_sparse_graph(zi_ts_graph, axis=1, convert=ts_I_zero_indexer) # topk = torch.topk(torch.tensor(predictions), n_ts_I) nDCG_score = nDCG(np.r_[0:ts_U.shape[0]], topk[1], zi_ts_graph[0]) precision_score = precision_at_m(np.r_[0:ts_U.shape[0]], topk[1], zi_ts_graph[0], m=M) print("nDCG Score for is {}".format(np.mean(nDCG_score))) print("Precision at {} is {}".format(M, np.mean(precision_score))) pass
def main(data, p, K, M, num_iterations, alpha, lambda_reg, init_std): # loc = "/Users/ekansh/repos/data/{}" # ds = "nyt/" # loss = 'kullback-leibler' # # loss = "frobenius" graph = (data[:, :2], data[:, 2]) graph, _ = zero_index_sparse_graph(graph, axis=0) graph, _ = zero_index_sparse_graph(graph, axis=1) U = np.unique(graph[0][:, 0]) nU = U.shape[0] I = np.unique(graph[0][:, 1]) nI = I.shape[0] # Split data train, test = edge_samp_split(graph, p) train_sparse = csr_matrix((train[1], (train[0][:, 0], train[0][:, 1])), shape=(nU, nI)) ## Train NMF # K = 10 # print("Using {} loss".format(loss)) # model = NMF(n_components=K, init='random', random_state=0, beta_loss=loss, solver='mu', max_iter=1000) S = wmf.log_surplus_confidence_matrix(train_sparse, alpha=alpha, epsilon=TINY) user_features, item_features = wmf.factorize( S, num_factors=K, lambda_reg=lambda_reg, num_iterations=num_iterations, init_std=init_std, verbose=True, dtype='float32', recompute_factors=wmf.recompute_factors_bias) ts_U = np.unique(test[0][:, 0]) zi_test, test_convert = zero_index_sparse_graph(test) mask_edges = train[0][np.in1d(train[0][:, 0], ts_U)] mask_edges[:, 0] = test_convert[mask_edges[:, 0]] test_user_features = user_features[ts_U] predictions = np.matmul(test_user_features, item_features.T) # Scatter update might be faster but for correctness for edge in mask_edges: predictions[tuple(edge)] = 0. # Recommend top_k topk = torch.topk(torch.tensor(predictions), I.shape[0])[1].numpy() # Evaluate: More metrics? nDCG_score = nDCG(np.r_[0:ts_U.shape[0]], topk, zi_test[0]) # m=20 precision_score = precision_at_m(np.r_[0:ts_U.shape[0]], topk, zi_test[0], M) print("nDCG Score for is {}".format(np.mean(nDCG_score))) print("Precision at {} is {}".format(m, np.mean(precision_score))) pass