Exemplo n.º 1
0
tr_model = NMF(n_components=K, init='random', random_state=0, max_iter=1000, beta_loss=loss, solver='mu')
tr_U_f = tr_model.fit_transform(zi_tr_graph_sparse)  # user features
tr_I_f = tr_model.components_  # item features

# Train lookup model with item features fixed
# 2. Train on graph_lookup: fix item_feat
zi_lu_graph = zero_index_sparse_graph(lu_graph, axis=1, convert=lu_I_zero_indexer)
zi_lu_graph = zero_index_sparse_graph(zi_lu_graph, axis=0, convert=lu_U_zero_indexer)
zi_lu_graph_sparse = csr_matrix((zi_lu_graph[1], (zi_lu_graph[0][:, 0], zi_lu_graph[0][:, 1])), shape=(n_lu_U, n_lu_I))

lu_model = trainedNMF(components_=tr_I_f[:, lu_I], n_components=K, init='random', random_state=0, max_iter=1000,
                      beta_loss=loss, solver='mu')

ts_U_f = lu_U_f = lu_model.transform(zi_lu_graph_sparse)
ts_I_f = tr_I_f[:, ts_I]
predictions = np.matmul(lu_U_f, ts_I_f)

zi_ts_graph = zero_index_sparse_graph(ts_graph, axis=0, convert=ts_U_zero_indexer)
zi_ts_graph = zero_index_sparse_graph(zi_ts_graph, axis=1, convert=ts_I_zero_indexer)
#
topk = torch.topk(torch.tensor(predictions), n_ts_I)
# 3. predict graph_test: evaluate

nDCG_score = nDCG(np.r_[0:ts_U.shape[0]], topk[1], zi_ts_graph[0])
m = 20
precision_score = precision_at_m(np.r_[0:ts_U.shape[0]], topk[1], zi_ts_graph[0], m=m)

print("nDCG Score for is {}".format(np.mean(nDCG_score)))
print("Precision at {} is {}".format(m, np.mean(precision_score)))
Exemplo n.º 2
0
                                            init_std=0.01, verbose=True, dtype='float32', 
                                            recompute_factors=wmf.recompute_factors_bias)

ts_U = np.unique(test[0][:, 0])
zi_test, test_convert = zero_index_sparse_graph(test)
mask_edges = train[0][np.in1d(train[0][:, 0], ts_U)]
mask_edges[:, 0] = test_convert[mask_edges[:, 0]]

test_user_features = user_features[ts_U]
predictions = np.matmul(test_user_features, item_features.T)

# Scatter update might be faster but for correctness
for edge in mask_edges:
    predictions[tuple(edge)] = 0.

# Recommend top_k

topk = torch.topk(torch.tensor(predictions), I.shape[0])[1].numpy()

# Evaluate: More metrics?
nDCG_score = nDCG(np.r_[0:ts_U.shape[0]], topk, zi_test[0])

m=20

precision_score = precision_at_m(np.r_[0:ts_U.shape[0]], topk, zi_test[0], m)


print("nDCG Score for is {}".format(np.mean(nDCG_score)))
print("Precision at {} is {}".format(m, np.mean(precision_score)))

Exemplo n.º 3
0
def main(data, p, q, K, M, num_iterations, alpha, lambda_reg, init_std):
    # loc = "/Users/ekansh/repos/data/{}"
    # ds = "nyt/"
    # loss = 'kullback-leibler'
    # # loss = "frobenius"

    graph = (data[:, :2], data[:, 2])

    graph, _ = zero_index_sparse_graph(graph, axis = 0)
    graph, _ = zero_index_sparse_graph(graph, axis = 1)

    U = np.unique(graph[0][:, 0])
    nU = U.shape[0]

    I = np.unique(graph[0][:, 1])
    nI = I.shape[0]

    # Split data
    # Data Split
    tr_graph, lu_graph, ts_graph, tr_U, lu_I = pq_samp_split(graph, p, q)
    U = np.unique(graph[0][:, 0])
    nU = U.shape[0]

    # THIS IS CONFUSING. FIX IT!
    tr_U_zero_indexer = zero_index(tr_U, True)
    I = np.unique(graph[0][:, 1])
    nI = I.shape[0]
    n_tr_U = tr_U.shape[0]
    tr_I = I
    n_tr_I = nI
    lu_U = ts_U = np.setdiff1d(U, tr_U, assume_unique=True)
    n_lu_U = n_ts_U = lu_U.shape[0]
    lu_U_zero_indexer = ts_U_zero_indexer = zero_index(lu_U, True)
    n_lu_I = lu_I.shape[0]
    lu_I_zero_indexer = zero_index(lu_I, True)
    ts_I = np.setdiff1d(I, lu_I, assume_unique=True)
    n_ts_I = ts_I.shape[0]
    ts_I_zero_indexer = zero_index(ts_I, True)

    ## Train NMF
    # K = 10
    # print("Using {} loss".format(loss))
    # model = NMF(n_components=K, init='random', random_state=0, beta_loss=loss, solver='mu', max_iter=1000)
    zi_tr_graph = zero_index_sparse_graph(tr_graph, axis=0, convert=tr_U_zero_indexer)
    zi_tr_graph_sparse = csr_matrix((zi_tr_graph[1], (zi_tr_graph[0][:, 0], zi_tr_graph[0][:, 1])), shape=(n_tr_U, n_tr_I))


    S_tr_sparse = wmf.log_surplus_confidence_matrix(zi_tr_graph_sparse, alpha=alpha, epsilon=TINY)
    tr_U_f, tr_I_f = wmf.factorize(S_tr_sparse, num_factors=K, lambda_reg=lambda_reg, num_iterations=num_iterations, 
                                                init_std=init_std, verbose=True, dtype='float32', 
                                                recompute_factors=wmf.recompute_factors_bias)
    tr_I_f = tr_I_f.T
    # Train lookup model with item features fixed
    # 2. Train on graph_lookup: fix item_feat
    zi_lu_graph = zero_index_sparse_graph(lu_graph, axis=1, convert=lu_I_zero_indexer)
    zi_lu_graph = zero_index_sparse_graph(zi_lu_graph, axis=0, convert=lu_U_zero_indexer)
    zi_lu_graph_sparse = csr_matrix((zi_lu_graph[1], (zi_lu_graph[0][:, 0], zi_lu_graph[0][:, 1])), shape=(n_lu_U, n_lu_I))

    S_lu_sparse = wmf.log_surplus_confidence_matrix(zi_lu_graph_sparse, alpha=alpha, epsilon=TINY)
    lu_U_f, _ = wmf.factorize(zi_lu_graph_sparse, num_factors=K, lambda_reg=lambda_reg, num_iterations=num_iterations, 
                                                init_std=init_std, verbose=True, dtype='float32', 
                                                recompute_factors=wmf.recompute_factors_bias, V=tr_I_f[:, lu_I].T)

    ts_U_f = lu_U_f 
    ts_I_f = tr_I_f[:, ts_I]
    predictions = np.matmul(ts_U_f, ts_I_f)

    zi_ts_graph = zero_index_sparse_graph(ts_graph, axis=0, convert=ts_U_zero_indexer)
    zi_ts_graph = zero_index_sparse_graph(zi_ts_graph, axis=1, convert=ts_I_zero_indexer)
#
    topk = torch.topk(torch.tensor(predictions), n_ts_I)

    nDCG_score = nDCG(np.r_[0:ts_U.shape[0]], topk[1], zi_ts_graph[0])
    precision_score = precision_at_m(np.r_[0:ts_U.shape[0]], topk[1], zi_ts_graph[0], m=M)

    print("nDCG Score for is {}".format(np.mean(nDCG_score)))
    print("Precision at {} is {}".format(M, np.mean(precision_score)))
    pass
Exemplo n.º 4
0
def main(data, p, K, M, num_iterations, alpha, lambda_reg, init_std):
    # loc = "/Users/ekansh/repos/data/{}"
    # ds = "nyt/"
    # loss = 'kullback-leibler'
    # # loss = "frobenius"

    graph = (data[:, :2], data[:, 2])

    graph, _ = zero_index_sparse_graph(graph, axis=0)
    graph, _ = zero_index_sparse_graph(graph, axis=1)

    U = np.unique(graph[0][:, 0])
    nU = U.shape[0]

    I = np.unique(graph[0][:, 1])
    nI = I.shape[0]

    # Split data
    train, test = edge_samp_split(graph, p)
    train_sparse = csr_matrix((train[1], (train[0][:, 0], train[0][:, 1])),
                              shape=(nU, nI))

    ## Train NMF
    # K = 10
    # print("Using {} loss".format(loss))
    # model = NMF(n_components=K, init='random', random_state=0, beta_loss=loss, solver='mu', max_iter=1000)
    S = wmf.log_surplus_confidence_matrix(train_sparse,
                                          alpha=alpha,
                                          epsilon=TINY)

    user_features, item_features = wmf.factorize(
        S,
        num_factors=K,
        lambda_reg=lambda_reg,
        num_iterations=num_iterations,
        init_std=init_std,
        verbose=True,
        dtype='float32',
        recompute_factors=wmf.recompute_factors_bias)

    ts_U = np.unique(test[0][:, 0])
    zi_test, test_convert = zero_index_sparse_graph(test)
    mask_edges = train[0][np.in1d(train[0][:, 0], ts_U)]
    mask_edges[:, 0] = test_convert[mask_edges[:, 0]]

    test_user_features = user_features[ts_U]
    predictions = np.matmul(test_user_features, item_features.T)

    # Scatter update might be faster but for correctness
    for edge in mask_edges:
        predictions[tuple(edge)] = 0.

    # Recommend top_k

    topk = torch.topk(torch.tensor(predictions), I.shape[0])[1].numpy()

    # Evaluate: More metrics?
    nDCG_score = nDCG(np.r_[0:ts_U.shape[0]], topk, zi_test[0])

    # m=20

    precision_score = precision_at_m(np.r_[0:ts_U.shape[0]], topk, zi_test[0],
                                     M)

    print("nDCG Score for is {}".format(np.mean(nDCG_score)))
    print("Precision at {} is {}".format(m, np.mean(precision_score)))
    pass