Exemplo n.º 1
0
def test_predict_ranks():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users,
                           no_items),
                          dtype=np.float32)

    model = LightFM()
    model.fit_partial(train)

    # Compute ranks for all items
    rank_input = sp.csr_matrix(np.ones((no_users, no_items)))
    ranks = model.predict_rank(rank_input, num_threads=2).todense()

    assert np.all(ranks.min(axis=1) == 0)
    assert np.all(ranks.max(axis=1) == no_items - 1)

    for row in range(no_users):
        assert np.all(np.sort(ranks[row]) == np.arange(no_items))

    # Make sure this is true also when there are ties
    model.user_embeddings = np.zeros_like(model.user_embeddings)
    model.item_embeddings = np.zeros_like(model.item_embeddings)
    model.user_biases = np.zeros_like(model.user_biases)
    model.item_biases = np.zeros_like(model.item_biases)

    ranks = model.predict_rank(rank_input, num_threads=2).todense()

    assert np.all(ranks.min(axis=1) == 0)
    assert np.all(ranks.max(axis=1) == 0)

    # Wrong input dimensions
    with pytest.raises(ValueError):
        model.predict_rank(sp.csr_matrix((5, 5)), num_threads=2)
Exemplo n.º 2
0
def test_predict_ranks():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users,
                           no_items),
                          dtype=np.float32)
    train = sp.rand(no_users, no_items, format='csr', random_state=42)

    model = LightFM()
    model.fit_partial(train)

    # Compute ranks for all items
    rank_input = sp.csr_matrix(np.ones((no_users, no_items)))
    ranks = model.predict_rank(rank_input, num_threads=2).todense()

    assert np.all(ranks.min(axis=1) == 0)
    assert np.all(ranks.max(axis=1) == no_items - 1)

    for row in range(no_users):
        assert np.all(np.sort(ranks[row]) == np.arange(no_items))

    # Train set exclusions. All ranks should be zero
    # if train interactions is dense.
    ranks = model.predict_rank(rank_input,
                               train_interactions=rank_input).todense()
    assert np.all(ranks == 0)

    # Max rank should be num_items - 1 - number of positives
    # in train in that row
    ranks = model.predict_rank(rank_input,
                               train_interactions=train).todense()
    assert np.all(np.squeeze(np.array(ranks.max(axis=1))) ==
                  no_items - 1 - np.squeeze(np.array(train.getnnz(axis=1))))

    # Make sure ranks are computed pessimistically when
    # there are ties (that is, equal predictions for every
    # item will assign maximum rank to each).
    model.user_embeddings = np.zeros_like(model.user_embeddings)
    model.item_embeddings = np.zeros_like(model.item_embeddings)
    model.user_biases = np.zeros_like(model.user_biases)
    model.item_biases = np.zeros_like(model.item_biases)

    ranks = model.predict_rank(rank_input, num_threads=2).todense()

    assert np.all(ranks.min(axis=1) == 99)
    assert np.all(ranks.max(axis=1) == 99)

    # Wrong input dimensions
    with pytest.raises(ValueError):
        model.predict_rank(sp.csr_matrix((5, 5)), num_threads=2)
Exemplo n.º 3
0
def test_predict_ranks():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users,
                           no_items),
                          dtype=np.float32)
    train = sp.rand(no_users, no_items, format='csr')

    model = LightFM()
    model.fit_partial(train)

    # Compute ranks for all items
    rank_input = sp.csr_matrix(np.ones((no_users, no_items)))
    ranks = model.predict_rank(rank_input, num_threads=2).todense()

    assert np.all(ranks.min(axis=1) == 0)
    assert np.all(ranks.max(axis=1) == no_items - 1)

    for row in range(no_users):
        assert np.all(np.sort(ranks[row]) == np.arange(no_items))

    # Train set exclusions. All ranks should be zero
    # if train interactions is dense.
    ranks = model.predict_rank(rank_input,
                               train_interactions=rank_input).todense()
    assert np.all(ranks == 0)

    # Max rank should be num_items - 1 - number of positives
    # in train in that row
    ranks = model.predict_rank(rank_input,
                               train_interactions=train).todense()
    assert np.all(np.squeeze(np.array(ranks.max(axis=1))) ==
                  no_items - 1 - np.squeeze(np.array(train.getnnz(axis=1))))

    # Make sure invariants hold when there are ties
    model.user_embeddings = np.zeros_like(model.user_embeddings)
    model.item_embeddings = np.zeros_like(model.item_embeddings)
    model.user_biases = np.zeros_like(model.user_biases)
    model.item_biases = np.zeros_like(model.item_biases)

    ranks = model.predict_rank(rank_input, num_threads=2).todense()

    assert np.all(ranks.min(axis=1) == 0)
    assert np.all(ranks.max(axis=1) == 0)

    # Wrong input dimensions
    with pytest.raises(ValueError):
        model.predict_rank(sp.csr_matrix((5, 5)), num_threads=2)
Exemplo n.º 4
0
def test_precision_at_k_with_ties():

    no_users, no_items = (10, 100)

    train, test = _generate_data(no_users, no_items)

    model = LightFM(loss="bpr")
    model.fit_partial(train)

    # Make all predictions zero
    model.user_embeddings = np.zeros_like(model.user_embeddings)
    model.item_embeddings = np.zeros_like(model.item_embeddings)
    model.user_biases = np.zeros_like(model.user_biases)
    model.item_biases = np.zeros_like(model.item_biases)

    k = 10

    precision = evaluation.precision_at_k(model, test, k=k)

    # Pessimistic precision with all ties
    assert precision.mean() == 0.0
Exemplo n.º 5
0
def test_predict_scores(num_threads=2):

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users,
                           no_items),
                          dtype=np.float32)
    train = sp.rand(no_users, no_items, format='csr')

    model = LightFM()
    model.fit_partial(train)

    # Compute scores and check if results equal to model.predict
    predict_input = sp.csr_matrix(np.ones((no_users, no_items)))
    scores = model.predict_score(predict_input,
                                 num_threads=num_threads).todense()
    for uid in range(no_users):
        scores_arr = model.predict(np.repeat(uid, no_items),
                                   np.arange(no_items))
        score_slice = np.array(scores)[uid, :]
        assert np.array_equal(score_slice, scores_arr)

    # check if precompute and parallelization work correctly
    scores_serial = model.predict_score(predict_input,
                                        num_threads=1).todense()
    scores_no_prec = model.predict_score(predict_input,
                                         num_threads=num_threads,
                                         precompute_representations=False
                                         ).todense()
    scores_ser_no_prec = model.predict_score(predict_input,
                                             num_threads=1,
                                             precompute_representations=False
                                             ).todense()
    assert np.array_equal(scores, scores_serial)
    assert np.array_equal(scores, scores_no_prec)
    assert np.array_equal(scores, scores_ser_no_prec)

    # Compute ranks and compares with ranks computed from scores
    ranks = model.predict_rank(predict_input,
                               num_threads=num_threads).todense()

    def rank_scores(s):
        # ranks from scores as in http://stackoverflow.com/a/14672797/5251962
        u, v = np.unique(s, return_inverse=True)
        return len(s) - 1 - (np.cumsum(np.bincount(v)) - 1)[v]

    check_ranks = np.apply_along_axis(rank_scores, 1, scores)
    assert np.array_equal(ranks, check_ranks)

    # Train set exclusions. All scores should be zero
    # if train interactions is dense.
    scores = model.predict_score(predict_input,
                                 train_interactions=predict_input).todense()
    assert np.all(scores == 0)

    # Make sure invariants hold when there are ties
    model.user_embeddings = np.zeros_like(model.user_embeddings)
    model.item_embeddings = np.zeros_like(model.item_embeddings)
    model.user_biases = np.zeros_like(model.user_biases)
    model.item_biases = np.zeros_like(model.item_biases)

    scores = model.predict_score(predict_input,
                                 num_threads=num_threads).todense()

    assert np.all(scores.min(axis=1) == 0)
    assert np.all(scores.max(axis=1) == 0)

    # Wrong input dimensions
    with pytest.raises(ValueError):
        model.predict_score(sp.csr_matrix((5, 5)), num_threads=num_threads)
Exemplo n.º 6
0
def test_predict_scores(num_threads=2):

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users, no_items), dtype=np.float32)
    train = sp.rand(no_users, no_items, format='csr')

    model = LightFM()
    model.fit_partial(train)

    # Compute scores and check if results equal to model.predict
    predict_input = sp.csr_matrix(np.ones((no_users, no_items)))
    scores = model.predict_score(predict_input,
                                 num_threads=num_threads).todense()
    for uid in range(no_users):
        scores_arr = model.predict(np.repeat(uid, no_items),
                                   np.arange(no_items))
        score_slice = np.array(scores)[uid, :]
        assert np.array_equal(score_slice, scores_arr)

    # check if precompute and parallelization work correctly
    scores_serial = model.predict_score(predict_input, num_threads=1).todense()
    scores_no_prec = model.predict_score(
        predict_input,
        num_threads=num_threads,
        precompute_representations=False).todense()
    scores_ser_no_prec = model.predict_score(
        predict_input, num_threads=1,
        precompute_representations=False).todense()
    assert np.array_equal(scores, scores_serial)
    assert np.array_equal(scores, scores_no_prec)
    assert np.array_equal(scores, scores_ser_no_prec)

    # Compute ranks and compares with ranks computed from scores
    ranks = model.predict_rank(predict_input,
                               num_threads=num_threads).todense()

    def rank_scores(s):
        # ranks from scores as in http://stackoverflow.com/a/14672797/5251962
        u, v = np.unique(s, return_inverse=True)
        return len(s) - 1 - (np.cumsum(np.bincount(v)) - 1)[v]

    check_ranks = np.apply_along_axis(rank_scores, 1, scores)
    assert np.array_equal(ranks, check_ranks)

    # Train set exclusions. All scores should be zero
    # if train interactions is dense.
    scores = model.predict_score(predict_input,
                                 train_interactions=predict_input).todense()
    assert np.all(scores == 0)

    # Make sure invariants hold when there are ties
    model.user_embeddings = np.zeros_like(model.user_embeddings)
    model.item_embeddings = np.zeros_like(model.item_embeddings)
    model.user_biases = np.zeros_like(model.user_biases)
    model.item_biases = np.zeros_like(model.item_biases)

    scores = model.predict_score(predict_input,
                                 num_threads=num_threads).todense()

    assert np.all(scores.min(axis=1) == 0)
    assert np.all(scores.max(axis=1) == 0)

    # Wrong input dimensions
    with pytest.raises(ValueError):
        model.predict_score(sp.csr_matrix((5, 5)), num_threads=num_threads)