示例#1
1
def test_matrix_types():

    mattypes = (sp.coo_matrix, sp.lil_matrix, sp.csr_matrix, sp.csc_matrix)

    dtypes = (np.int32, np.int64, np.float32, np.float64)

    no_users, no_items = (10, 100)
    no_features = 20

    for mattype in mattypes:
        for dtype in dtypes:
            train = mattype((no_users, no_items), dtype=dtype)
            weights = train.tocoo()

            user_features = mattype((no_users, no_features), dtype=dtype)
            item_features = mattype((no_items, no_features), dtype=dtype)

            model = LightFM()
            model.fit_partial(
                train,
                sample_weight=weights,
                user_features=user_features,
                item_features=item_features,
            )

            model.predict(
                np.random.randint(0, no_users, 10).astype(np.int32),
                np.random.randint(0, no_items, 10).astype(np.int32),
                user_features=user_features,
                item_features=item_features,
            )

            model.predict_rank(
                train, user_features=user_features, item_features=item_features
            )
示例#2
0
def test_recall_at_k():

    no_users, no_items = (10, 100)

    train, test = _generate_data(no_users, no_items)

    model = LightFM(loss="bpr")
    model.fit_partial(test)

    for k in (10, 5, 1):

        # Without omitting train interactions
        recall = evaluation.recall_at_k(model, test, k=k)
        expected_mean_recall = _recall_at_k(model, test, k)

        assert np.allclose(recall.mean(), expected_mean_recall)
        assert len(recall) == (test.getnnz(axis=1) > 0).sum()
        assert (len(evaluation.recall_at_k(
            model, train, preserve_rows=True)) == test.shape[0])

        # With omitting train interactions
        recall = evaluation.recall_at_k(model,
                                        test,
                                        k=k,
                                        train_interactions=train)
        expected_mean_recall = _recall_at_k(model, test, k, train=train)

        assert np.allclose(recall.mean(), expected_mean_recall)
示例#3
0
def test_precision_at_k():

    no_users, no_items = (10, 100)

    train, test = _generate_data(no_users, no_items)

    model = LightFM(loss="bpr")

    # We want a high precision to catch the k=1 case
    model.fit_partial(test)

    for k in (10, 5, 1):

        # Without omitting train interactions
        precision = evaluation.precision_at_k(model, test, k=k)
        expected_mean_precision = _precision_at_k(model, test, k)

        assert np.allclose(precision.mean(), expected_mean_precision)
        assert len(precision) == (test.getnnz(axis=1) > 0).sum()
        assert (len(evaluation.precision_at_k(
            model, train, preserve_rows=True)) == test.shape[0])

        # With omitting train interactions
        precision = evaluation.precision_at_k(model,
                                              test,
                                              k=k,
                                              train_interactions=train)
        expected_mean_precision = _precision_at_k(model, test, k, train=train)

        assert np.allclose(precision.mean(), expected_mean_precision)
示例#4
0
def test_recall_at_k():

    no_users, no_items = (10, 100)

    train, test = _generate_data(no_users, no_items)

    model = LightFM(loss="bpr")
    model.fit_partial(test)

    for k in (10, 5, 1):

        # Without omitting train interactions
        recall = evaluation.recall_at_k(model, test, k=k)
        expected_mean_recall = _recall_at_k(model, test, k)

        assert np.allclose(recall.mean(), expected_mean_recall)
        assert len(recall) == (test.getnnz(axis=1) > 0).sum()
        assert (
            len(evaluation.recall_at_k(model, train, preserve_rows=True))
            == test.shape[0]
        )

        # With omitting train interactions
        recall = evaluation.recall_at_k(model, test, k=k, train_interactions=train)
        expected_mean_recall = _recall_at_k(model, test, k, train=train)

        assert np.allclose(recall.mean(), expected_mean_recall)
示例#5
0
def test_auc_score():

    no_users, no_items = (10, 100)

    train, test = _generate_data(no_users, no_items)

    model = LightFM(loss="bpr")
    model.fit_partial(train)

    auc = evaluation.auc_score(model, test, num_threads=2)
    expected_auc = np.array(_auc(model, test))

    assert auc.shape == expected_auc.shape
    assert np.abs(auc.mean() - expected_auc.mean()) < 0.01
    assert len(auc) == (test.getnnz(axis=1) > 0).sum()
    assert len(evaluation.auc_score(model, train,
                                    preserve_rows=True)) == test.shape[0]

    # With omitting train interactions
    auc = evaluation.auc_score(model,
                               test,
                               train_interactions=train,
                               num_threads=2)
    expected_auc = np.array(_auc(model, test, train))
    assert np.abs(auc.mean() - expected_auc.mean()) < 0.01
示例#6
0
def test_precision_at_k():

    no_users, no_items = (10, 100)

    train, test = _generate_data(no_users, no_items)

    model = LightFM(loss="bpr")

    # We want a high precision to catch the k=1 case
    model.fit_partial(test)

    for k in (10, 5, 1):

        # Without omitting train interactions
        precision = evaluation.precision_at_k(model, test, k=k)
        expected_mean_precision = _precision_at_k(model, test, k)

        assert np.allclose(precision.mean(), expected_mean_precision)
        assert len(precision) == (test.getnnz(axis=1) > 0).sum()
        assert (
            len(evaluation.precision_at_k(model, train, preserve_rows=True))
            == test.shape[0]
        )

        # With omitting train interactions
        precision = evaluation.precision_at_k(
            model, test, k=k, train_interactions=train
        )
        expected_mean_precision = _precision_at_k(model, test, k, train=train)

        assert np.allclose(precision.mean(), expected_mean_precision)
示例#7
0
def test_user_supplied_features_accuracy():

    model = LightFM(random_state=SEED)
    model.fit_partial(
        train,
        user_features=train_user_features,
        item_features=train_item_features,
        epochs=10,
    )

    train_predictions = model.predict(
        train.row,
        train.col,
        user_features=train_user_features,
        item_features=train_item_features,
    )
    test_predictions = model.predict(
        test.row,
        test.col,
        user_features=test_user_features,
        item_features=test_item_features,
    )

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
示例#8
0
def test_get_representations():

    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10)

    num_users, num_items = train.shape

    for (item_features, user_features) in (
        (None, None),
        (
            (sp.identity(num_items) + sp.random(num_items, num_items)),
            (sp.identity(num_users) + sp.random(num_users, num_users)),
        ),
    ):

        test_predictions = model.predict(
            test.row, test.col, user_features=user_features, item_features=item_features
        )

        item_biases, item_latent = model.get_item_representations(item_features)
        user_biases, user_latent = model.get_user_representations(user_features)

        assert item_latent.dtype == np.float32
        assert user_latent.dtype == np.float32

        predictions = (
            (user_latent[test.row] * item_latent[test.col]).sum(axis=1)
            + user_biases[test.row]
            + item_biases[test.col]
        )

        assert np.allclose(test_predictions, predictions, atol=0.000001)
def test_matrix_types():

    mattypes = (sp.coo_matrix, sp.lil_matrix, sp.csr_matrix, sp.csc_matrix)

    dtypes = (np.int32, np.int64, np.float32, np.float64)

    no_users, no_items = (10, 100)
    no_features = 20

    for mattype in mattypes:
        for dtype in dtypes:
            train = mattype((no_users, no_items), dtype=dtype)
            weights = train.tocoo()

            user_features = mattype((no_users, no_features), dtype=dtype)
            item_features = mattype((no_items, no_features), dtype=dtype)

            model = LightFM()
            model.fit_partial(
                train,
                sample_weight=weights,
                user_features=user_features,
                item_features=item_features,
            )

            model.predict(
                np.random.randint(0, no_users, 10).astype(np.int32),
                np.random.randint(0, no_items, 10).astype(np.int32),
                user_features=user_features,
                item_features=item_features,
            )

            model.predict_rank(train,
                               user_features=user_features,
                               item_features=item_features)
示例#10
0
def test_get_representations():

    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10)

    num_users, num_items = train.shape

    for (item_features, user_features) in (
        (None, None),
        (
            (sp.identity(num_items) + sp.random(num_items, num_items)),
            (sp.identity(num_users) + sp.random(num_users, num_users)),
        ),
    ):

        test_predictions = model.predict(test.row,
                                         test.col,
                                         user_features=user_features,
                                         item_features=item_features)

        item_biases, item_latent = model.get_item_representations(
            item_features)
        user_biases, user_latent = model.get_user_representations(
            user_features)

        assert item_latent.dtype == np.float32
        assert user_latent.dtype == np.float32

        predictions = (
            (user_latent[test.row] * item_latent[test.col]).sum(axis=1) +
            user_biases[test.row] + item_biases[test.col])

        assert np.allclose(test_predictions, predictions, atol=0.000001)
示例#11
0
def test_empty_matrix():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    model = LightFM()
    model.fit_partial(train)
def test_empty_matrix():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    model = LightFM()
    model.fit_partial(train)
示例#13
0
def test_intersections_check():

    no_users, no_items = (10, 100)

    train, test = _generate_data(no_users, no_items)

    model = LightFM(loss="bpr")
    model.fit_partial(train)

    # check error is raised when train and test have interactions in common
    with pytest.raises(ValueError):
        evaluation.auc_score(
            model, train, train_interactions=train, check_intersections=True
        )

    with pytest.raises(ValueError):
        evaluation.recall_at_k(
            model, train, train_interactions=train, check_intersections=True
        )

    with pytest.raises(ValueError):
        evaluation.precision_at_k(
            model, train, train_interactions=train, check_intersections=True
        )

    with pytest.raises(ValueError):
        evaluation.reciprocal_rank(
            model, train, train_interactions=train, check_intersections=True
        )

    # check no errors raised when train and test have no interactions in common
    evaluation.auc_score(
        model, test, train_interactions=train, check_intersections=True
    )
    evaluation.recall_at_k(
        model, test, train_interactions=train, check_intersections=True
    )
    evaluation.precision_at_k(
        model, test, train_interactions=train, check_intersections=True
    )
    evaluation.reciprocal_rank(
        model, test, train_interactions=train, check_intersections=True
    )

    # check no error is raised when there are intersections but flag is False
    evaluation.auc_score(
        model, train, train_interactions=train, check_intersections=False
    )
    evaluation.recall_at_k(
        model, train, train_interactions=train, check_intersections=False
    )
    evaluation.precision_at_k(
        model, train, train_interactions=train, check_intersections=False
    )
    evaluation.reciprocal_rank(
        model, train, train_interactions=train, check_intersections=False
    )
示例#14
0
def test_movielens_accuracy():

    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
示例#15
0
def test_movielens_accuracy():

    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
示例#16
0
def test_warp_stability():

    learning_rates = (0.05, 0.1, 0.5)

    for lrate in learning_rates:

        model = LightFM(learning_rate=lrate, loss="warp", random_state=SEED)
        model.fit_partial(train, epochs=10)

        assert not np.isnan(model.user_embeddings).any()
        assert not np.isnan(model.item_embeddings).any()
示例#17
0
def test_hogwild_accuracy():

    # Should get comparable accuracy with 2 threads
    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10, num_threads=2)

    train_predictions = model.predict(train.row, train.col, num_threads=2)
    test_predictions = model.predict(test.row, test.col, num_threads=2)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
示例#18
0
def test_warp_stability():

    learning_rates = (0.05, 0.1, 0.5)

    for lrate in learning_rates:

        model = LightFM(learning_rate=lrate, loss="warp", random_state=SEED)
        model.fit_partial(train, epochs=10)

        assert not np.isnan(model.user_embeddings).any()
        assert not np.isnan(model.item_embeddings).any()
示例#19
0
def test_hogwild_accuracy():

    # Should get comparable accuracy with 2 threads
    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10, num_threads=2)

    train_predictions = model.predict(train.row, train.col, num_threads=2)
    test_predictions = model.predict(test.row, test.col, num_threads=2)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
示例#20
0
def test_random_state_fixing():

    model = LightFM(learning_rate=0.05, loss="warp", random_state=SEED)

    model.fit_partial(train, epochs=2)

    model_2 = LightFM(learning_rate=0.05, loss="warp", random_state=SEED)

    model_2.fit_partial(train, epochs=2)

    assert np.all(model.user_embeddings == model_2.user_embeddings)
    assert np.all(model.item_embeddings == model_2.item_embeddings)
示例#21
0
def test_random_state_fixing():

    model = LightFM(learning_rate=0.05, loss="warp", random_state=SEED)

    model.fit_partial(train, epochs=2)

    model_2 = LightFM(learning_rate=0.05, loss="warp", random_state=SEED)

    model_2.fit_partial(train, epochs=2)

    assert np.all(model.user_embeddings == model_2.user_embeddings)
    assert np.all(model.item_embeddings == model_2.item_embeddings)
示例#22
0
def test_logistic_precision():

    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10)

    (train_precision, test_precision, full_train_auc,
     full_test_auc) = _get_metrics(model, train, test)

    assert train_precision > 0.3
    assert test_precision > 0.03

    assert full_train_auc > 0.79
    assert full_test_auc > 0.73
示例#23
0
def test_overfitting():

    # Let's massivly overfit
    model = LightFM(no_components=50, random_state=SEED)
    model.fit_partial(train, epochs=30)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)
    overfit_train = roc_auc_score(train.data, train_predictions)
    overfit_test = roc_auc_score(test.data, test_predictions)

    assert overfit_train > 0.99
    assert overfit_test < 0.75
示例#24
0
def test_zeros_negative_accuracy():

    # Should get the same accuracy when zeros are used to
    # denote negative interactions
    train.data[train.data == -1] = 0
    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
示例#25
0
def test_zeros_negative_accuracy():

    # Should get the same accuracy when zeros are used to
    # denote negative interactions
    train.data[train.data == -1] = 0
    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
示例#26
0
def test_regularization():

    # Let's regularize
    model = LightFM(
        no_components=50, item_alpha=0.0001, user_alpha=0.0001, random_state=SEED
    )
    model.fit_partial(train, epochs=30)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.80
    assert roc_auc_score(test.data, test_predictions) > 0.75
示例#27
0
def test_overfitting():

    # Let's massivly overfit
    model = LightFM(no_components=50, random_state=SEED)
    model.fit_partial(train, epochs=30)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)
    overfit_train = roc_auc_score(train.data, train_predictions)
    overfit_test = roc_auc_score(test.data, test_predictions)

    assert overfit_train > 0.99
    assert overfit_test < 0.75
示例#28
0
def test_predict():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    model = LightFM()
    model.fit_partial(train)

    for uid in range(no_users):
        scores_arr = model.predict(np.repeat(uid, no_items), np.arange(no_items))
        scores_int = model.predict(uid, np.arange(no_items))
        assert np.allclose(scores_arr, scores_int)
示例#29
0
def test_random_state_advanced():
    # Check that using the random state
    # to seed rand_r in Cython advances
    # the random generator state.

    model = LightFM(learning_rate=0.05, loss="warp", random_state=SEED)

    model.fit_partial(train, epochs=1)

    rng_state = model.random_state.get_state()[1].copy()

    model.fit_partial(train, epochs=1)

    assert not np.all(rng_state == model.random_state.get_state()[1])
示例#30
0
def test_not_enough_features_fails():

    no_users, no_items = (10, 100)
    no_features = 20

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    user_features = sp.csr_matrix((no_users - 1, no_features), dtype=np.int32)
    item_features = sp.csr_matrix((no_items - 1, no_features), dtype=np.int32)
    model = LightFM()
    with pytest.raises(Exception):
        model.fit_partial(
            train, user_features=user_features, item_features=item_features
        )
示例#31
0
def test_logistic_precision():

    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10)

    (train_precision, test_precision, full_train_auc, full_test_auc) = _get_metrics(
        model, train, test
    )

    assert train_precision > 0.3
    assert test_precision > 0.03

    assert full_train_auc > 0.79
    assert full_test_auc > 0.73
示例#32
0
def test_bpr_precision():

    model = LightFM(learning_rate=0.05, loss="bpr", random_state=SEED)

    model.fit_partial(train, epochs=10)

    (train_precision, test_precision, full_train_auc,
     full_test_auc) = _get_metrics(model, train, test)

    assert train_precision > 0.45
    assert test_precision > 0.07

    assert full_train_auc > 0.91
    assert full_test_auc > 0.87
示例#33
0
def test_warp_precision_multithreaded():

    model = LightFM(learning_rate=0.05, loss="warp", random_state=SEED)

    model.fit_partial(train, epochs=10, num_threads=4)

    (train_precision, test_precision, full_train_auc,
     full_test_auc) = _get_metrics(model, train, test)

    assert train_precision > 0.45
    assert test_precision > 0.07

    assert full_train_auc > 0.9
    assert full_test_auc > 0.9
示例#34
0
def test_random_state_advanced():
    # Check that using the random state
    # to seed rand_r in Cython advances
    # the random generator state.

    model = LightFM(learning_rate=0.05, loss="warp", random_state=SEED)

    model.fit_partial(train, epochs=1)

    rng_state = model.random_state.get_state()[1].copy()

    model.fit_partial(train, epochs=1)

    assert not np.all(rng_state == model.random_state.get_state()[1])
def test_not_enough_features_fails():

    no_users, no_items = (10, 100)
    no_features = 20

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    user_features = sp.csr_matrix((no_users - 1, no_features), dtype=np.int32)
    item_features = sp.csr_matrix((no_items - 1, no_features), dtype=np.int32)
    model = LightFM()
    with pytest.raises(Exception):
        model.fit_partial(train,
                          user_features=user_features,
                          item_features=item_features)
示例#36
0
def test_predict():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    model = LightFM()
    model.fit_partial(train)

    for uid in range(no_users):
        scores_arr = model.predict(np.repeat(uid, no_items),
                                   np.arange(no_items))
        scores_int = model.predict(uid, np.arange(no_items))
        assert np.allclose(scores_arr, scores_int)
示例#37
0
def test_warp_precision_multithreaded():

    model = LightFM(learning_rate=0.05, loss="warp", random_state=SEED)

    model.fit_partial(train, epochs=10, num_threads=4)

    (train_precision, test_precision, full_train_auc, full_test_auc) = _get_metrics(
        model, train, test
    )

    assert train_precision > 0.45
    assert test_precision > 0.07

    assert full_train_auc > 0.9
    assert full_test_auc > 0.9
示例#38
0
def test_bpr_precision():

    model = LightFM(learning_rate=0.05, loss="bpr", random_state=SEED)

    model.fit_partial(train, epochs=10)

    (train_precision, test_precision, full_train_auc, full_test_auc) = _get_metrics(
        model, train, test
    )

    assert train_precision > 0.45
    assert test_precision > 0.07

    assert full_train_auc > 0.91
    assert full_test_auc > 0.87
示例#39
0
def test_movielens_genre_accuracy():

    item_features = fetch_movielens(indicator_features=False, genre_features=True)[
        "item_features"
    ]

    assert item_features.shape[1] < item_features.shape[0]

    model = LightFM(random_state=SEED)
    model.fit_partial(train, item_features=item_features, epochs=10)

    train_predictions = model.predict(train.row, train.col, item_features=item_features)
    test_predictions = model.predict(test.row, test.col, item_features=item_features)

    assert roc_auc_score(train.data, train_predictions) > 0.75
    assert roc_auc_score(test.data, test_predictions) > 0.69
示例#40
0
def test_zero_weights_accuracy():

    # When very small weights are used
    # accuracy should be no better than
    # random.
    weights = train.copy()
    weights.data = np.zeros(train.getnnz(), dtype=np.float32)

    for loss in ("logistic", "bpr", "warp"):
        model = LightFM(loss=loss, random_state=SEED)
        model.fit_partial(train, sample_weight=weights, epochs=10)

        train_predictions = model.predict(train.row, train.col)
        test_predictions = model.predict(test.row, test.col)

        assert 0.45 < roc_auc_score(train.data, train_predictions) < 0.55
        assert 0.45 < roc_auc_score(test.data, test_predictions) < 0.55
示例#41
0
def test_warp_precision_max_sampled():

    model = LightFM(learning_rate=0.05, max_sampled=1, loss="warp", random_state=SEED)

    # This is equivalent to a no-op pass
    # over the training data
    model.max_sampled = 0

    model.fit_partial(train, epochs=1)

    (train_precision, test_precision, full_train_auc, full_test_auc) = _get_metrics(
        model, train, test
    )

    # The AUC should be no better than random
    assert full_train_auc < 0.55
    assert full_test_auc < 0.55
示例#42
0
def test_bpr_precision_high_interaction_values():

    model = LightFM(learning_rate=0.05, loss="bpr", random_state=SEED)

    _train = train.copy()
    _train.data = _train.data * 5

    model.fit_partial(_train, epochs=10)

    (train_precision, test_precision, full_train_auc,
     full_test_auc) = _get_metrics(model, _train, test)

    assert train_precision > 0.31
    assert test_precision > 0.04

    assert full_train_auc > 0.86
    assert full_test_auc > 0.84
示例#43
0
def test_zero_weights_accuracy():

    # When very small weights are used
    # accuracy should be no better than
    # random.
    weights = train.copy()
    weights.data = np.zeros(train.getnnz(), dtype=np.float32)

    for loss in ("logistic", "bpr", "warp"):
        model = LightFM(loss=loss, random_state=SEED)
        model.fit_partial(train, sample_weight=weights, epochs=10)

        train_predictions = model.predict(train.row, train.col)
        test_predictions = model.predict(test.row, test.col)

        assert 0.45 < roc_auc_score(train.data, train_predictions) < 0.55
        assert 0.45 < roc_auc_score(test.data, test_predictions) < 0.55
示例#44
0
def test_training_schedules():

    model = LightFM(no_components=10,
                    learning_schedule="adagrad",
                    random_state=SEED)
    model.fit_partial(train, epochs=0)

    assert (model.item_embedding_gradients == 1).all()
    assert (model.item_embedding_momentum == 0).all()
    assert (model.item_bias_gradients == 1).all()
    assert (model.item_bias_momentum == 0).all()

    assert (model.user_embedding_gradients == 1).all()
    assert (model.user_embedding_momentum == 0).all()
    assert (model.user_bias_gradients == 1).all()
    assert (model.user_bias_momentum == 0).all()

    model.fit_partial(train, epochs=1)

    assert (model.item_embedding_gradients > 1).any()
    assert (model.item_embedding_momentum == 0).all()
    assert (model.item_bias_gradients > 1).any()
    assert (model.item_bias_momentum == 0).all()

    assert (model.user_embedding_gradients > 1).any()
    assert (model.user_embedding_momentum == 0).all()
    assert (model.user_bias_gradients > 1).any()
    assert (model.user_bias_momentum == 0).all()

    model = LightFM(no_components=10,
                    learning_schedule="adadelta",
                    random_state=SEED)
    model.fit_partial(train, epochs=0)

    assert (model.item_embedding_gradients == 0).all()
    assert (model.item_embedding_momentum == 0).all()
    assert (model.item_bias_gradients == 0).all()
    assert (model.item_bias_momentum == 0).all()

    assert (model.user_embedding_gradients == 0).all()
    assert (model.user_embedding_momentum == 0).all()
    assert (model.user_bias_gradients == 0).all()
    assert (model.user_bias_momentum == 0).all()

    model.fit_partial(train, epochs=1)

    assert (model.item_embedding_gradients > 0).any()
    assert (model.item_embedding_momentum > 0).any()
    assert (model.item_bias_gradients > 0).any()
    assert (model.item_bias_momentum > 0).any()

    assert (model.user_embedding_gradients > 0).any()
    assert (model.user_embedding_momentum > 0).any()
    assert (model.user_bias_gradients > 0).any()
    assert (model.user_bias_momentum > 0).any()
def test_return_self():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    model = LightFM()
    assert model.fit_partial(train) is model
    assert model.fit(train) is model
示例#46
0
def test_return_self():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    model = LightFM()
    assert model.fit_partial(train) is model
    assert model.fit(train) is model
示例#47
0
def test_bpr_precision_high_interaction_values():

    model = LightFM(learning_rate=0.05, loss="bpr", random_state=SEED)

    _train = train.copy()
    _train.data = _train.data * 5

    model.fit_partial(_train, epochs=10)

    (train_precision, test_precision, full_train_auc, full_test_auc) = _get_metrics(
        model, _train, test
    )

    assert train_precision > 0.31
    assert test_precision > 0.04

    assert full_train_auc > 0.86
    assert full_test_auc > 0.84
示例#48
0
def test_movielens_both_accuracy():
    """
    Accuracy with both genre metadata and item-specific
    features shoul be no worse than with just item-specific
    features (though more training may be necessary).
    """

    item_features = fetch_movielens(indicator_features=True, genre_features=True)[
        "item_features"
    ]

    model = LightFM(random_state=SEED)
    model.fit_partial(train, item_features=item_features, epochs=15)

    train_predictions = model.predict(train.row, train.col, item_features=item_features)
    test_predictions = model.predict(test.row, test.col, item_features=item_features)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.75
示例#49
0
def test_warp_precision_adadelta_multithreaded():

    model = LightFM(
        learning_schedule="adadelta",
        rho=0.95,
        epsilon=0.000001,
        loss="warp",
        random_state=SEED,
    )

    model.fit_partial(train, epochs=10, num_threads=4)

    (train_precision, test_precision, full_train_auc,
     full_test_auc) = _get_metrics(model, train, test)

    assert train_precision > 0.45
    assert test_precision > 0.07

    assert full_train_auc > 0.9
    assert full_test_auc > 0.9
示例#50
0
def test_movielens_accuracy_sample_weights():
    # Scaling weights down and learning rate up
    # by the same amount should result in
    # roughly the same accuracy

    scale = 0.5
    weights = train.copy()
    weights.data = np.ones(train.getnnz(), dtype=np.float32) * scale

    for (loss, exp_score) in (("logistic", 0.74), ("bpr", 0.84), ("warp",
                                                                  0.89)):
        model = LightFM(loss=loss, random_state=SEED)
        model.learning_rate * 1.0 / scale

        model.fit_partial(train, sample_weight=weights, epochs=10)

        (train_precision, test_precision, full_train_auc,
         full_test_auc) = _get_metrics(model, train, test)

        assert full_train_auc > exp_score
示例#51
0
def test_movielens_accuracy_sample_weights():
    # Scaling weights down and learning rate up
    # by the same amount should result in
    # roughly the same accuracy

    scale = 0.5
    weights = train.copy()
    weights.data = np.ones(train.getnnz(), dtype=np.float32) * scale

    for (loss, exp_score) in (("logistic", 0.74), ("bpr", 0.84), ("warp", 0.89)):
        model = LightFM(loss=loss, random_state=SEED)
        model.learning_rate * 1.0 / scale

        model.fit_partial(train, sample_weight=weights, epochs=10)

        (train_precision, test_precision, full_train_auc, full_test_auc) = _get_metrics(
            model, train, test
        )

        assert full_train_auc > exp_score
示例#52
0
def test_training_schedules():

    model = LightFM(no_components=10, learning_schedule="adagrad", random_state=SEED)
    model.fit_partial(train, epochs=0)

    assert (model.item_embedding_gradients == 1).all()
    assert (model.item_embedding_momentum == 0).all()
    assert (model.item_bias_gradients == 1).all()
    assert (model.item_bias_momentum == 0).all()

    assert (model.user_embedding_gradients == 1).all()
    assert (model.user_embedding_momentum == 0).all()
    assert (model.user_bias_gradients == 1).all()
    assert (model.user_bias_momentum == 0).all()

    model.fit_partial(train, epochs=1)

    assert (model.item_embedding_gradients > 1).any()
    assert (model.item_embedding_momentum == 0).all()
    assert (model.item_bias_gradients > 1).any()
    assert (model.item_bias_momentum == 0).all()

    assert (model.user_embedding_gradients > 1).any()
    assert (model.user_embedding_momentum == 0).all()
    assert (model.user_bias_gradients > 1).any()
    assert (model.user_bias_momentum == 0).all()

    model = LightFM(no_components=10, learning_schedule="adadelta", random_state=SEED)
    model.fit_partial(train, epochs=0)

    assert (model.item_embedding_gradients == 0).all()
    assert (model.item_embedding_momentum == 0).all()
    assert (model.item_bias_gradients == 0).all()
    assert (model.item_bias_momentum == 0).all()

    assert (model.user_embedding_gradients == 0).all()
    assert (model.user_embedding_momentum == 0).all()
    assert (model.user_bias_gradients == 0).all()
    assert (model.user_bias_momentum == 0).all()

    model.fit_partial(train, epochs=1)

    assert (model.item_embedding_gradients > 0).any()
    assert (model.item_embedding_momentum > 0).any()
    assert (model.item_bias_gradients > 0).any()
    assert (model.item_bias_momentum > 0).any()

    assert (model.user_embedding_gradients > 0).any()
    assert (model.user_embedding_momentum > 0).any()
    assert (model.user_bias_gradients > 0).any()
    assert (model.user_bias_momentum > 0).any()
示例#53
0
def test_movielens_excessive_regularization():

    for loss in ("logistic", "warp", "bpr", "warp-kos"):

        # Should perform poorly with high regularization.
        # Check that regularization does not accumulate
        # until it reaches infinity.
        model = LightFM(
            no_components=10,
            item_alpha=1.0,
            user_alpha=1.0,
            loss=loss,
            random_state=SEED,
        )
        model.fit_partial(train, epochs=10, num_threads=4)

        train_predictions = model.predict(train.row, train.col)
        test_predictions = model.predict(test.row, test.col)

        assert roc_auc_score(train.data, train_predictions) < 0.65
        assert roc_auc_score(test.data, test_predictions) < 0.65
示例#54
0
def test_feature_inference_fails():

    # On predict if we try to use feature inference and supply
    # higher ids than the number of features that were supplied to fit
    # we should complain

    no_users, no_items = (10, 100)
    no_features = 20

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    user_features = sp.csr_matrix((no_users, no_features), dtype=np.int32)
    item_features = sp.csr_matrix((no_items, no_features), dtype=np.int32)
    model = LightFM()
    model.fit_partial(train, user_features=user_features, item_features=item_features)

    with pytest.raises(ValueError):
        model.predict(
            np.array([no_features], dtype=np.int32),
            np.array([no_features], dtype=np.int32),
        )
示例#55
0
def test_movielens_excessive_regularization():

    for loss in ("logistic", "warp", "bpr", "warp-kos"):

        # Should perform poorly with high regularization.
        # Check that regularization does not accumulate
        # until it reaches infinity.
        model = LightFM(
            no_components=10,
            item_alpha=1.0,
            user_alpha=1.0,
            loss=loss,
            random_state=SEED,
        )
        model.fit_partial(train, epochs=10, num_threads=4)

        train_predictions = model.predict(train.row, train.col)
        test_predictions = model.predict(test.row, test.col)

        assert roc_auc_score(train.data, train_predictions) < 0.65
        assert roc_auc_score(test.data, test_predictions) < 0.65
示例#56
-1
def test_user_supplied_features_accuracy():

    model = LightFM(random_state=SEED)
    model.fit_partial(
        train,
        user_features=train_user_features,
        item_features=train_item_features,
        epochs=10,
    )

    train_predictions = model.predict(
        train.row,
        train.col,
        user_features=train_user_features,
        item_features=train_item_features,
    )
    test_predictions = model.predict(
        test.row,
        test.col,
        user_features=test_user_features,
        item_features=test_item_features,
    )

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76