Exemplo n.º 1
1
def test_matrix_types():

    mattypes = (sp.coo_matrix, sp.lil_matrix, sp.csr_matrix, sp.csc_matrix)

    dtypes = (np.int32, np.int64, np.float32, np.float64)

    no_users, no_items = (10, 100)
    no_features = 20

    for mattype in mattypes:
        for dtype in dtypes:
            train = mattype((no_users, no_items), dtype=dtype)
            weights = train.tocoo()

            user_features = mattype((no_users, no_features), dtype=dtype)
            item_features = mattype((no_items, no_features), dtype=dtype)

            model = LightFM()
            model.fit_partial(
                train,
                sample_weight=weights,
                user_features=user_features,
                item_features=item_features,
            )

            model.predict(
                np.random.randint(0, no_users, 10).astype(np.int32),
                np.random.randint(0, no_items, 10).astype(np.int32),
                user_features=user_features,
                item_features=item_features,
            )

            model.predict_rank(
                train, user_features=user_features, item_features=item_features
            )
Exemplo n.º 2
0
def test_matrix_types():

    mattypes = (sp.coo_matrix, sp.lil_matrix, sp.csr_matrix, sp.csc_matrix)

    dtypes = (np.int32, np.int64, np.float32, np.float64)

    no_users, no_items = (10, 100)
    no_features = 20

    for mattype in mattypes:
        for dtype in dtypes:
            train = mattype((no_users, no_items), dtype=dtype)
            weights = train.tocoo()

            user_features = mattype((no_users, no_features), dtype=dtype)
            item_features = mattype((no_items, no_features), dtype=dtype)

            model = LightFM()
            model.fit_partial(
                train,
                sample_weight=weights,
                user_features=user_features,
                item_features=item_features,
            )

            model.predict(
                np.random.randint(0, no_users, 10).astype(np.int32),
                np.random.randint(0, no_items, 10).astype(np.int32),
                user_features=user_features,
                item_features=item_features,
            )

            model.predict_rank(train,
                               user_features=user_features,
                               item_features=item_features)
Exemplo n.º 3
0
def test_user_supplied_features_accuracy():

    model = LightFM(random_state=SEED)
    model.fit_partial(
        train,
        user_features=train_user_features,
        item_features=train_item_features,
        epochs=10,
    )

    train_predictions = model.predict(
        train.row,
        train.col,
        user_features=train_user_features,
        item_features=train_item_features,
    )
    test_predictions = model.predict(
        test.row,
        test.col,
        user_features=test_user_features,
        item_features=test_item_features,
    )

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Exemplo n.º 4
0
def test_movielens_accuracy():

    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Exemplo n.º 5
0
def test_movielens_accuracy():

    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Exemplo n.º 6
0
def test_hogwild_accuracy():

    # Should get comparable accuracy with 2 threads
    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10, num_threads=2)

    train_predictions = model.predict(train.row, train.col, num_threads=2)
    test_predictions = model.predict(test.row, test.col, num_threads=2)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Exemplo n.º 7
0
def test_hogwild_accuracy():

    # Should get comparable accuracy with 2 threads
    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10, num_threads=2)

    train_predictions = model.predict(train.row, train.col, num_threads=2)
    test_predictions = model.predict(test.row, test.col, num_threads=2)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Exemplo n.º 8
0
def test_movielens_accuracy_pickle():

    model = LightFM(random_state=SEED)
    model.fit(train, epochs=10)

    model = pickle.loads(pickle.dumps(model))

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Exemplo n.º 9
0
def test_movielens_accuracy_pickle():

    model = LightFM(random_state=SEED)
    model.fit(train, epochs=10)

    model = pickle.loads(pickle.dumps(model))

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Exemplo n.º 10
0
def test_regularization():

    # Let's regularize
    model = LightFM(
        no_components=50, item_alpha=0.0001, user_alpha=0.0001, random_state=SEED
    )
    model.fit_partial(train, epochs=30)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.80
    assert roc_auc_score(test.data, test_predictions) > 0.75
Exemplo n.º 11
0
def test_overfitting():

    # Let's massivly overfit
    model = LightFM(no_components=50, random_state=SEED)
    model.fit_partial(train, epochs=30)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)
    overfit_train = roc_auc_score(train.data, train_predictions)
    overfit_test = roc_auc_score(test.data, test_predictions)

    assert overfit_train > 0.99
    assert overfit_test < 0.75
Exemplo n.º 12
0
def test_zeros_negative_accuracy():

    # Should get the same accuracy when zeros are used to
    # denote negative interactions
    train.data[train.data == -1] = 0
    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Exemplo n.º 13
0
def test_zeros_negative_accuracy():

    # Should get the same accuracy when zeros are used to
    # denote negative interactions
    train.data[train.data == -1] = 0
    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Exemplo n.º 14
0
def test_overfitting():

    # Let's massivly overfit
    model = LightFM(no_components=50, random_state=SEED)
    model.fit_partial(train, epochs=30)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)
    overfit_train = roc_auc_score(train.data, train_predictions)
    overfit_test = roc_auc_score(test.data, test_predictions)

    assert overfit_train > 0.99
    assert overfit_test < 0.75
Exemplo n.º 15
0
def test_predict():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    model = LightFM()
    model.fit_partial(train)

    for uid in range(no_users):
        scores_arr = model.predict(np.repeat(uid, no_items), np.arange(no_items))
        scores_int = model.predict(uid, np.arange(no_items))
        assert np.allclose(scores_arr, scores_int)
Exemplo n.º 16
0
def test_predict():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    model = LightFM()
    model.fit_partial(train)

    for uid in range(no_users):
        scores_arr = model.predict(np.repeat(uid, no_items),
                                   np.arange(no_items))
        scores_int = model.predict(uid, np.arange(no_items))
        assert np.allclose(scores_arr, scores_int)
Exemplo n.º 17
0
def test_predict_not_fitted():

    model = LightFM()

    with pytest.raises(ValueError):
        model.predict(np.arange(10), np.arange(10))

    with pytest.raises(ValueError):
        model.predict_rank(1)

    with pytest.raises(ValueError):
        model.get_user_representations()

    with pytest.raises(ValueError):
        model.get_item_representations()
Exemplo n.º 18
0
def test_predict_not_fitted():

    model = LightFM()

    with pytest.raises(ValueError):
        model.predict(np.arange(10), np.arange(10))

    with pytest.raises(ValueError):
        model.predict_rank(1)

    with pytest.raises(ValueError):
        model.get_user_representations()

    with pytest.raises(ValueError):
        model.get_item_representations()
Exemplo n.º 19
0
def test_get_representations():

    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10)

    num_users, num_items = train.shape

    for (item_features, user_features) in (
        (None, None),
        (
            (sp.identity(num_items) + sp.random(num_items, num_items)),
            (sp.identity(num_users) + sp.random(num_users, num_users)),
        ),
    ):

        test_predictions = model.predict(test.row,
                                         test.col,
                                         user_features=user_features,
                                         item_features=item_features)

        item_biases, item_latent = model.get_item_representations(
            item_features)
        user_biases, user_latent = model.get_user_representations(
            user_features)

        assert item_latent.dtype == np.float32
        assert user_latent.dtype == np.float32

        predictions = (
            (user_latent[test.row] * item_latent[test.col]).sum(axis=1) +
            user_biases[test.row] + item_biases[test.col])

        assert np.allclose(test_predictions, predictions, atol=0.000001)
Exemplo n.º 20
0
def test_movielens_genre_accuracy():

    item_features = fetch_movielens(indicator_features=False, genre_features=True)[
        "item_features"
    ]

    assert item_features.shape[1] < item_features.shape[0]

    model = LightFM(random_state=SEED)
    model.fit_partial(train, item_features=item_features, epochs=10)

    train_predictions = model.predict(train.row, train.col, item_features=item_features)
    test_predictions = model.predict(test.row, test.col, item_features=item_features)

    assert roc_auc_score(train.data, train_predictions) > 0.75
    assert roc_auc_score(test.data, test_predictions) > 0.69
Exemplo n.º 21
0
def test_get_representations():

    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10)

    num_users, num_items = train.shape

    for (item_features, user_features) in (
        (None, None),
        (
            (sp.identity(num_items) + sp.random(num_items, num_items)),
            (sp.identity(num_users) + sp.random(num_users, num_users)),
        ),
    ):

        test_predictions = model.predict(
            test.row, test.col, user_features=user_features, item_features=item_features
        )

        item_biases, item_latent = model.get_item_representations(item_features)
        user_biases, user_latent = model.get_user_representations(user_features)

        assert item_latent.dtype == np.float32
        assert user_latent.dtype == np.float32

        predictions = (
            (user_latent[test.row] * item_latent[test.col]).sum(axis=1)
            + user_biases[test.row]
            + item_biases[test.col]
        )

        assert np.allclose(test_predictions, predictions, atol=0.000001)
Exemplo n.º 22
0
def test_zero_weights_accuracy():

    # When very small weights are used
    # accuracy should be no better than
    # random.
    weights = train.copy()
    weights.data = np.zeros(train.getnnz(), dtype=np.float32)

    for loss in ("logistic", "bpr", "warp"):
        model = LightFM(loss=loss, random_state=SEED)
        model.fit_partial(train, sample_weight=weights, epochs=10)

        train_predictions = model.predict(train.row, train.col)
        test_predictions = model.predict(test.row, test.col)

        assert 0.45 < roc_auc_score(train.data, train_predictions) < 0.55
        assert 0.45 < roc_auc_score(test.data, test_predictions) < 0.55
Exemplo n.º 23
0
def test_zero_weights_accuracy():

    # When very small weights are used
    # accuracy should be no better than
    # random.
    weights = train.copy()
    weights.data = np.zeros(train.getnnz(), dtype=np.float32)

    for loss in ("logistic", "bpr", "warp"):
        model = LightFM(loss=loss, random_state=SEED)
        model.fit_partial(train, sample_weight=weights, epochs=10)

        train_predictions = model.predict(train.row, train.col)
        test_predictions = model.predict(test.row, test.col)

        assert 0.45 < roc_auc_score(train.data, train_predictions) < 0.55
        assert 0.45 < roc_auc_score(test.data, test_predictions) < 0.55
Exemplo n.º 24
0
def test_movielens_both_accuracy():
    """
    Accuracy with both genre metadata and item-specific
    features shoul be no worse than with just item-specific
    features (though more training may be necessary).
    """

    item_features = fetch_movielens(indicator_features=True, genre_features=True)[
        "item_features"
    ]

    model = LightFM(random_state=SEED)
    model.fit_partial(train, item_features=item_features, epochs=15)

    train_predictions = model.predict(train.row, train.col, item_features=item_features)
    test_predictions = model.predict(test.row, test.col, item_features=item_features)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.75
Exemplo n.º 25
0
def test_movielens_excessive_regularization():

    for loss in ("logistic", "warp", "bpr", "warp-kos"):

        # Should perform poorly with high regularization.
        # Check that regularization does not accumulate
        # until it reaches infinity.
        model = LightFM(
            no_components=10,
            item_alpha=1.0,
            user_alpha=1.0,
            loss=loss,
            random_state=SEED,
        )
        model.fit_partial(train, epochs=10, num_threads=4)

        train_predictions = model.predict(train.row, train.col)
        test_predictions = model.predict(test.row, test.col)

        assert roc_auc_score(train.data, train_predictions) < 0.65
        assert roc_auc_score(test.data, test_predictions) < 0.65
Exemplo n.º 26
0
def test_movielens_excessive_regularization():

    for loss in ("logistic", "warp", "bpr", "warp-kos"):

        # Should perform poorly with high regularization.
        # Check that regularization does not accumulate
        # until it reaches infinity.
        model = LightFM(
            no_components=10,
            item_alpha=1.0,
            user_alpha=1.0,
            loss=loss,
            random_state=SEED,
        )
        model.fit_partial(train, epochs=10, num_threads=4)

        train_predictions = model.predict(train.row, train.col)
        test_predictions = model.predict(test.row, test.col)

        assert roc_auc_score(train.data, train_predictions) < 0.65
        assert roc_auc_score(test.data, test_predictions) < 0.65
Exemplo n.º 27
0
def test_feature_inference_fails():

    # On predict if we try to use feature inference and supply
    # higher ids than the number of features that were supplied to fit
    # we should complain

    no_users, no_items = (10, 100)
    no_features = 20

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    user_features = sp.csr_matrix((no_users, no_features), dtype=np.int32)
    item_features = sp.csr_matrix((no_items, no_features), dtype=np.int32)
    model = LightFM()
    model.fit_partial(train, user_features=user_features, item_features=item_features)

    with pytest.raises(ValueError):
        model.predict(
            np.array([no_features], dtype=np.int32),
            np.array([no_features], dtype=np.int32),
        )
Exemplo n.º 28
0
def test_feature_inference_fails():

    # On predict if we try to use feature inference and supply
    # higher ids than the number of features that were supplied to fit
    # we should complain

    no_users, no_items = (10, 100)
    no_features = 20

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    user_features = sp.csr_matrix((no_users, no_features), dtype=np.int32)
    item_features = sp.csr_matrix((no_items, no_features), dtype=np.int32)
    model = LightFM()
    model.fit_partial(train,
                      user_features=user_features,
                      item_features=item_features)

    with pytest.raises(ValueError):
        model.predict(
            np.array([no_features], dtype=np.int32),
            np.array([no_features], dtype=np.int32),
        )
Exemplo n.º 29
0
def test_input_dtypes():

    dtypes = (np.int32, np.int64, np.float32, np.float64)

    no_users, no_items = (10, 100)
    no_features = 20

    for dtype in dtypes:
        train = sp.coo_matrix((no_users, no_items), dtype=dtype)

        user_features = sp.coo_matrix((no_users, no_features), dtype=dtype)
        item_features = sp.coo_matrix((no_items, no_features), dtype=dtype)

        model = LightFM()
        model.fit_partial(
            train, user_features=user_features, item_features=item_features
        )

        model.predict(
            np.random.randint(0, no_users, 10).astype(np.int32),
            np.random.randint(0, no_items, 10).astype(np.int32),
            user_features=user_features,
            item_features=item_features,
        )
Exemplo n.º 30
0
def test_input_dtypes():

    dtypes = (np.int32, np.int64, np.float32, np.float64)

    no_users, no_items = (10, 100)
    no_features = 20

    for dtype in dtypes:
        train = sp.coo_matrix((no_users, no_items), dtype=dtype)

        user_features = sp.coo_matrix((no_users, no_features), dtype=dtype)
        item_features = sp.coo_matrix((no_items, no_features), dtype=dtype)

        model = LightFM()
        model.fit_partial(train,
                          user_features=user_features,
                          item_features=item_features)

        model.predict(
            np.random.randint(0, no_users, 10).astype(np.int32),
            np.random.randint(0, no_items, 10).astype(np.int32),
            user_features=user_features,
            item_features=item_features,
        )
Exemplo n.º 31
0
def test_overflow_predict():

    no_users, no_items = (1000, 1000)

    train = sp.rand(no_users, no_items, format="csr", random_state=42)

    model = LightFM(loss="warp")

    model.fit(train)

    with pytest.raises((ValueError, OverflowError)):
        print(
            model.predict(
                1231241241231241414,
                np.arange(no_items),
                user_features=sp.identity(no_users),
            ))
Exemplo n.º 32
0
def test_overflow_predict():

    no_users, no_items = (1000, 1000)

    train = sp.rand(no_users, no_items, format="csr", random_state=42)

    model = LightFM(loss="warp")

    model.fit(train)

    with pytest.raises((ValueError, OverflowError)):
        print(
            model.predict(
                1231241241231241414,
                np.arange(no_items),
                user_features=sp.identity(no_users),
            )
        )
Exemplo n.º 33
-1
def test_user_supplied_features_accuracy():

    model = LightFM(random_state=SEED)
    model.fit_partial(
        train,
        user_features=train_user_features,
        item_features=train_item_features,
        epochs=10,
    )

    train_predictions = model.predict(
        train.row,
        train.col,
        user_features=train_user_features,
        item_features=train_item_features,
    )
    test_predictions = model.predict(
        test.row,
        test.col,
        user_features=test_user_features,
        item_features=test_item_features,
    )

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76