Exemplo n.º 1
0
def test_movielens_accuracy_sample_weights_grad_accumulation():

    # Set weights to zero for all even-numbered users
    # and check that they have not accumulated any
    # gradient updates.

    weights = train.copy()
    weights.data = np.ones(train.getnnz(),
                           dtype=np.float32)
    even_users = weights.row % 2 == 0
    weights.data *= even_users

    even_idx = np.arange(train.shape[0]) % 2 == 0
    odd_idx = np.arange(train.shape[0]) % 2 != 0

    for loss in ('logistic', 'bpr', 'warp'):
        model = LightFM(loss=loss, random_state=SEED)

        model.fit_partial(train,
                          sample_weight=weights,
                          epochs=1)

        assert np.allclose(model.user_embedding_gradients[odd_idx], 1.0)
        assert np.allclose(model.user_bias_gradients[odd_idx], 1.0)

        assert not np.allclose(model.user_embedding_gradients[even_idx], 1.0)
        assert not np.allclose(model.user_bias_gradients[even_idx], 1.0)
Exemplo n.º 2
0
def test_predict_ranks():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users,
                           no_items),
                          dtype=np.float32)

    model = LightFM()
    model.fit_partial(train)

    # Compute ranks for all items
    rank_input = sp.csr_matrix(np.ones((no_users, no_items)))
    ranks = model.predict_rank(rank_input, num_threads=2).todense()

    assert np.all(ranks.min(axis=1) == 0)
    assert np.all(ranks.max(axis=1) == no_items - 1)

    for row in range(no_users):
        assert np.all(np.sort(ranks[row]) == np.arange(no_items))

    # Make sure this is true also when there are ties
    model.user_embeddings = np.zeros_like(model.user_embeddings)
    model.item_embeddings = np.zeros_like(model.item_embeddings)
    model.user_biases = np.zeros_like(model.user_biases)
    model.item_biases = np.zeros_like(model.item_biases)

    ranks = model.predict_rank(rank_input, num_threads=2).todense()

    assert np.all(ranks.min(axis=1) == 0)
    assert np.all(ranks.max(axis=1) == 0)

    # Wrong input dimensions
    with pytest.raises(ValueError):
        model.predict_rank(sp.csr_matrix((5, 5)), num_threads=2)
Exemplo n.º 3
0
def test_warp_kos_precision():

    # Remove all negative examples
    training = train.copy()
    training.data[training.data < 1] = 0
    training = training.tocsr()
    training.eliminate_zeros()

    model = LightFM(learning_rate=0.05, k=5,
                    loss='warp-kos')

    model.fit_partial(training,
                      epochs=10)

    train_precision = precision_at_k(model,
                                     training,
                                     10)
    test_precision = precision_at_k(model,
                                    test,
                                    10)

    full_train_auc = full_auc(model, training)
    full_test_auc = full_auc(model, test)

    assert train_precision > 0.44
    assert test_precision > 0.06

    assert full_train_auc > 0.9
    assert full_test_auc > 0.87
Exemplo n.º 4
0
def test_input_dtypes():

    dtypes = (np.int32,
              np.int64,
              np.float32,
              np.float64)

    no_users, no_items = (10, 100)
    no_features = 20

    for dtype in dtypes:
        train = sp.coo_matrix((no_users,
                               no_items),
                              dtype=dtype)

        user_features = sp.coo_matrix((no_users,
                                       no_features),
                                      dtype=dtype)
        item_features = sp.coo_matrix((no_items,
                                       no_features),
                                      dtype=dtype)

        model = LightFM()
        model.fit_partial(train,
                          user_features=user_features,
                          item_features=item_features)

        model.predict(np.random.randint(0, no_users, 10).astype(np.int32),
                      np.random.randint(0, no_items, 10).astype(np.int32),
                      user_features=user_features,
                      item_features=item_features)
Exemplo n.º 5
0
def test_warp_kos_precision():

    # Remove all negative examples
    training = train.copy()
    training.data[training.data < 1] = 0
    training = training.tocsr()
    training.eliminate_zeros()

    model = LightFM(learning_rate=0.05, k=5,
                    loss='warp-kos',
                    random_state=SEED)

    model.fit_partial(training,
                      epochs=10)

    (train_precision,
     test_precision,
     full_train_auc,
     full_test_auc) = _get_metrics(model,
                                   train,
                                   test)

    assert train_precision > 0.44
    assert test_precision > 0.06

    assert full_train_auc > 0.9
    assert full_test_auc > 0.87
Exemplo n.º 6
0
def test_movielens_accuracy_sample_weights():
    # Scaling weights down and learning rate up
    # by the same amount should result in
    # roughly the same accuracy

    scale = 1e-01
    weights = train.copy()
    weights.data = np.ones(train.getnnz(),
                           dtype=np.float32) * scale

    for (loss, exp_score) in (('logistic', 0.74),
                              ('bpr', 0.84),
                              ('warp', 0.89)):
        model = LightFM(loss=loss, random_state=SEED)
        model.learning_rate * 1.0 / scale

        model.fit_partial(train,
                          sample_weight=weights,
                          epochs=10)

        (train_precision,
         test_precision,
         full_train_auc,
         full_test_auc) = _get_metrics(model,
                                       train,
                                       test)

        assert full_train_auc > exp_score
Exemplo n.º 7
0
def test_feature_inference_fails():

    # On predict if we try to use feature inference and supply
    # higher ids than the number of features that were supplied to fit
    # we should complain

    no_users, no_items = (10, 100)
    no_features = 20

    train = sp.coo_matrix((no_users,
                           no_items),
                          dtype=np.int32)

    user_features = sp.csr_matrix((no_users,
                                   no_features),
                                  dtype=np.int32)
    item_features = sp.csr_matrix((no_items,
                                   no_features),
                                  dtype=np.int32)
    model = LightFM()
    model.fit_partial(train,
                      user_features=user_features,
                      item_features=item_features)

    with pytest.raises(AssertionError):
        model.predict(np.array([no_features], dtype=np.int32),
                      np.array([no_features], dtype=np.int32))
Exemplo n.º 8
0
def test_auc_score():

    no_users, no_items = (10, 100)

    train, test = _generate_data(no_users, no_items)

    model = LightFM(loss='bpr')
    model.fit_partial(train)

    auc = evaluation.auc_score(model,
                               test,
                               num_threads=2)
    expected_auc = np.array(_auc(model,
                                 test))

    assert auc.shape == expected_auc.shape
    assert np.abs(auc.mean() - expected_auc.mean()) < 0.01
    assert len(auc) == (test.getnnz(axis=1) > 0).sum()
    assert len(evaluation.auc_score(model,
                                    train,
                                    preserve_rows=True)) == test.shape[0]

    # With omitting train interactions
    auc = evaluation.auc_score(model,
                               test,
                               train_interactions=train,
                               num_threads=2)
    expected_auc = np.array(_auc(model,
                                 test,
                                 train))
    assert np.abs(auc.mean() - expected_auc.mean()) < 0.01
Exemplo n.º 9
0
def test_matrix_types():

    mattypes = (sp.coo_matrix, sp.lil_matrix, sp.csr_matrix, sp.csc_matrix)

    dtypes = (np.int32, np.int64, np.float32, np.float64)

    no_users, no_items = (10, 100)
    no_features = 20

    for mattype in mattypes:
        for dtype in dtypes:
            train = mattype((no_users, no_items), dtype=dtype)

            user_features = mattype((no_users, no_features), dtype=dtype)
            item_features = mattype((no_items, no_features), dtype=dtype)

            model = LightFM()
            model.fit_partial(train, user_features=user_features, item_features=item_features)

            model.predict(
                np.random.randint(0, no_users, 10).astype(np.int32),
                np.random.randint(0, no_items, 10).astype(np.int32),
                user_features=user_features,
                item_features=item_features,
            )
Exemplo n.º 10
0
def test_warp_precision_adadelta_multithreaded():

    model = LightFM(learning_schedule='adadelta',
                    rho=0.95,
                    epsilon=0.000001,
                    loss='warp')

    model.fit_partial(train,
                      epochs=10,
                      num_threads=4)

    train_precision = precision_at_k(model,
                                     train,
                                     10)
    test_precision = precision_at_k(model,
                                    test,
                                    10)

    full_train_auc = full_auc(model, train)
    full_test_auc = full_auc(model, test)

    assert train_precision > 0.45
    assert test_precision > 0.07

    assert full_train_auc > 0.94
    assert full_test_auc > 0.9
Exemplo n.º 11
0
def test_predict(num_threads=2):

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users,
                           no_items),
                          dtype=np.int32)

    model = LightFM()
    model.fit_partial(train)

    for uid in range(no_users):
        scores_arr = model.predict(np.repeat(uid, no_items),
                                   np.arange(no_items))
        scores_int = model.predict(uid,
                                   np.arange(no_items))
        assert np.allclose(scores_arr, scores_int)
        scores_parallel = model.predict(np.repeat(uid, no_items),
                                        np.arange(no_items),
                                        num_threads=num_threads)
        assert np.allclose(scores_parallel, scores_arr)
        scores_no_prec = model.predict(np.repeat(uid, no_items),
                                       np.arange(no_items),
                                       num_threads=num_threads,
                                       precompute_representations=False)
        assert np.allclose(scores_parallel, scores_no_prec)
        scores_no_prec_serial = model.predict(np.repeat(uid, no_items),
                                              np.arange(no_items),
                                              num_threads=1,
                                              precompute_representations=False)
        assert np.allclose(scores_parallel, scores_no_prec_serial)
Exemplo n.º 12
0
def test_empty_matrix():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    model = LightFM()
    model.fit_partial(train)
Exemplo n.º 13
0
def fit_lightfm_model():
	""" Fit the lightFM model 
	
	returns d_user_pred, list_user, list_coupon
	list_coupon = list of test coupons 
	list_user = list of user ID 
	d_user_pred : key = user, value = predicted ranking of coupons in list_coupon
	"""

	#Load data
	Mui_train = spi.mmread("../Data/Data_translated/biclass_user_item_train_mtrx.mtx")
	uf        = spi.mmread("../Data/Data_translated/user_feat_mtrx.mtx")
	itrf      = spi.mmread("../Data/Data_translated/train_item_feat_mtrx.mtx")
	itef      = spi.mmread("../Data/Data_translated/test_item_feat_mtrx.mtx")
	
	#Print shapes as a check
	print "user_features shape: %s,\nitem train features shape: %s,\nitem test features shape: %s"   % (uf.shape, itrf.shape, itef.shape)
	
	#Load test coupon  and user lists
	cplte       = pd.read_csv("../Data/Data_translated/coupon_list_test_translated.csv")
	ulist       = pd.read_csv("../Data/Data_translated/user_list_translated.csv")
	list_coupon = cplte["COUPON_ID_hash"].values
	list_user   = ulist["USER_ID_hash"].values
	
	#Build model
	no_comp, lr, ep = 10, 0.01, 5
	model = LightFM(no_components=no_comp, learning_rate=lr, loss='warp')
	model.fit_partial(Mui_train, user_features = uf, item_features = itrf, epochs = ep, num_threads = 4, verbose = True)

	test               = sps.csr_matrix((len(list_user), len(list_coupon)), dtype = np.int32)
	no_users, no_items = test.shape
	pid_array          = np.arange(no_items, dtype=np.int32)

	#Create and initialise dict to store predictions
	d_user_pred = {}
	for user in list_user :
		d_user_pred[user] = []
	
	# Loop over users and compute predictions
	for user_id, row in enumerate(test):
		sys.stdout.write("\rProcessing user " + str(user_id)+"/ "+str(len(list_user)))
		sys.stdout.flush()
		uid_array         = np.empty(no_items, dtype=np.int32)
		uid_array.fill(user_id)
		predictions       = model.predict(uid_array, pid_array,user_features = uf, item_features = itef, num_threads=4)
		user              = str(list_user[user_id])
		# apply MinMaxScaler for blending later on
		MMS               = MinMaxScaler()
		pred              = MMS.fit_transform(np.ravel(predictions))
		d_user_pred[user] = pred

	# Pickle the predictions for future_use
	d_pred = {"list_coupon" : list_coupon.tolist(), "d_user_pred" : d_user_pred}
	with open("../Data/Data_translated/d_pred_lightfm.pickle", "w") as f:
		pickle.dump(d_pred, f, protocol = pickle.HIGHEST_PROTOCOL)

	return d_user_pred, list_user, list_coupon
Exemplo n.º 14
0
def test_random_state_fixing():

    model = LightFM(learning_rate=0.05, loss='warp', random_state=SEED)

    model.fit_partial(train, epochs=2)

    model_2 = LightFM(learning_rate=0.05, loss='warp', random_state=SEED)

    model_2.fit_partial(train, epochs=2)

    assert np.all(model.user_embeddings == model_2.user_embeddings)
    assert np.all(model.item_embeddings == model_2.item_embeddings)
Exemplo n.º 15
0
def test_movielens_accuracy_resume():

    model = LightFM(random_state=SEED)

    for _ in range(10):
        model.fit_partial(train, epochs=1)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Exemplo n.º 16
0
def test_not_enough_features_fails():

    no_users, no_items = (10, 100)
    no_features = 20

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    user_features = sp.csr_matrix((no_users - 1, no_features), dtype=np.int32)
    item_features = sp.csr_matrix((no_items - 1, no_features), dtype=np.int32)
    model = LightFM()
    with pytest.raises(Exception):
        model.fit_partial(train, user_features=user_features, item_features=item_features)
Exemplo n.º 17
0
def test_predict():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    model = LightFM()
    model.fit_partial(train)

    for uid in range(no_users):
        scores_arr = model.predict(np.repeat(uid, no_items), np.arange(no_items))
        scores_int = model.predict(uid, np.arange(no_items))
        assert np.allclose(scores_arr, scores_int)
Exemplo n.º 18
0
def test_warp_stability():

    learning_rates = (0.05, 0.1, 0.5)

    for lrate in learning_rates:

        model = LightFM(learning_rate=lrate,
                        loss='warp')
        model.fit_partial(train,
                          epochs=10)

        assert not np.isnan(model.user_embeddings).any()
        assert not np.isnan(model.item_embeddings).any()
Exemplo n.º 19
0
def test_movielens_accuracy():

    model = LightFM()
    model.fit_partial(train,
                      epochs=10)

    train_predictions = model.predict(train.row,
                                      train.col)
    test_predictions = model.predict(test.row,
                                     test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Exemplo n.º 20
0
def test_overfitting():

    # Let's massivly overfit
    model = LightFM(no_components=50, random_state=SEED)
    model.fit_partial(train, epochs=30)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)
    overfit_train = roc_auc_score(train.data, train_predictions)
    overfit_test = roc_auc_score(test.data, test_predictions)

    assert overfit_train > 0.99
    assert overfit_test < 0.75
Exemplo n.º 21
0
def test_logistic_precision():

    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10)

    (train_precision, test_precision, full_train_auc,
     full_test_auc) = _get_metrics(model, train, test)

    assert train_precision > 0.3
    assert test_precision > 0.03

    assert full_train_auc > 0.79
    assert full_test_auc > 0.73
Exemplo n.º 22
0
def test_zeros_negative_accuracy():

    # Should get the same accuracy when zeros are used to
    # denote negative interactions
    train.data[train.data == -1] = 0
    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Exemplo n.º 23
0
def test_predict_ranks():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users,
                           no_items),
                          dtype=np.float32)
    train = sp.rand(no_users, no_items, format='csr', random_state=42)

    model = LightFM()
    model.fit_partial(train)

    # Compute ranks for all items
    rank_input = sp.csr_matrix(np.ones((no_users, no_items)))
    ranks = model.predict_rank(rank_input, num_threads=2).todense()

    assert np.all(ranks.min(axis=1) == 0)
    assert np.all(ranks.max(axis=1) == no_items - 1)

    for row in range(no_users):
        assert np.all(np.sort(ranks[row]) == np.arange(no_items))

    # Train set exclusions. All ranks should be zero
    # if train interactions is dense.
    ranks = model.predict_rank(rank_input,
                               train_interactions=rank_input).todense()
    assert np.all(ranks == 0)

    # Max rank should be num_items - 1 - number of positives
    # in train in that row
    ranks = model.predict_rank(rank_input,
                               train_interactions=train).todense()
    assert np.all(np.squeeze(np.array(ranks.max(axis=1))) ==
                  no_items - 1 - np.squeeze(np.array(train.getnnz(axis=1))))

    # Make sure ranks are computed pessimistically when
    # there are ties (that is, equal predictions for every
    # item will assign maximum rank to each).
    model.user_embeddings = np.zeros_like(model.user_embeddings)
    model.item_embeddings = np.zeros_like(model.item_embeddings)
    model.user_biases = np.zeros_like(model.user_biases)
    model.item_biases = np.zeros_like(model.item_biases)

    ranks = model.predict_rank(rank_input, num_threads=2).todense()

    assert np.all(ranks.min(axis=1) == 99)
    assert np.all(ranks.max(axis=1) == 99)

    # Wrong input dimensions
    with pytest.raises(ValueError):
        model.predict_rank(sp.csr_matrix((5, 5)), num_threads=2)
Exemplo n.º 24
0
def test_movielens_excessive_regularization():

    # Should perform poorly with high regularization
    model = LightFM(no_components=10,
                    item_alpha=1.0,
                    user_alpha=1.0,
                    random_state=SEED)
    model.fit_partial(train, epochs=10)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) < 0.6
    assert roc_auc_score(test.data, test_predictions) < 0.6
Exemplo n.º 25
0
def test_predict():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    model = LightFM()
    model.fit_partial(train)

    for uid in range(no_users):
        scores_arr = model.predict(np.repeat(uid, no_items),
                                   np.arange(no_items))
        scores_int = model.predict(uid, np.arange(no_items))
        assert np.allclose(scores_arr, scores_int)
Exemplo n.º 26
0
def test_regularization():

    # Let's regularize
    model = LightFM(no_components=50,
                    item_alpha=0.0001,
                    user_alpha=0.0001,
                    random_state=SEED)
    model.fit_partial(train, epochs=30)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.80
    assert roc_auc_score(test.data, test_predictions) > 0.75
Exemplo n.º 27
0
def test_warp_precision_multithreaded():

    model = LightFM(learning_rate=0.05, loss='warp', random_state=SEED)

    model.fit_partial(train, epochs=10, num_threads=4)

    (train_precision, test_precision, full_train_auc,
     full_test_auc) = _get_metrics(model, train, test)

    assert train_precision > 0.45
    assert test_precision > 0.07

    assert full_train_auc > 0.94
    assert full_test_auc > 0.9
Exemplo n.º 28
0
def test_not_enough_features_fails():

    no_users, no_items = (10, 100)
    no_features = 20

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    user_features = sp.csr_matrix((no_users - 1, no_features), dtype=np.int32)
    item_features = sp.csr_matrix((no_items - 1, no_features), dtype=np.int32)
    model = LightFM()
    with pytest.raises(Exception):
        model.fit_partial(train,
                          user_features=user_features,
                          item_features=item_features)
Exemplo n.º 29
0
def test_bpr_precision():

    model = LightFM(learning_rate=0.05, loss='bpr', random_state=SEED)

    model.fit_partial(train, epochs=10)

    (train_precision, test_precision, full_train_auc,
     full_test_auc) = _get_metrics(model, train, test)

    assert train_precision > 0.31
    assert test_precision > 0.04

    assert full_train_auc > 0.86
    assert full_test_auc > 0.84
Exemplo n.º 30
0
def test_random_state_advanced():
    # Check that using the random state
    # to seed rand_r in Cython advances
    # the random generator state.

    model = LightFM(learning_rate=0.05, loss='warp', random_state=SEED)

    model.fit_partial(train, epochs=1)

    rng_state = model.rng.get_state()[1].copy()

    model.fit_partial(train, epochs=1)

    assert not np.all(rng_state == model.rng.get_state()[1])
Exemplo n.º 31
0
def test_predict_ranks():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users,
                           no_items),
                          dtype=np.float32)
    train = sp.rand(no_users, no_items, format='csr')

    model = LightFM()
    model.fit_partial(train)

    # Compute ranks for all items
    rank_input = sp.csr_matrix(np.ones((no_users, no_items)))
    ranks = model.predict_rank(rank_input, num_threads=2).todense()

    assert np.all(ranks.min(axis=1) == 0)
    assert np.all(ranks.max(axis=1) == no_items - 1)

    for row in range(no_users):
        assert np.all(np.sort(ranks[row]) == np.arange(no_items))

    # Train set exclusions. All ranks should be zero
    # if train interactions is dense.
    ranks = model.predict_rank(rank_input,
                               train_interactions=rank_input).todense()
    assert np.all(ranks == 0)

    # Max rank should be num_items - 1 - number of positives
    # in train in that row
    ranks = model.predict_rank(rank_input,
                               train_interactions=train).todense()
    assert np.all(np.squeeze(np.array(ranks.max(axis=1))) ==
                  no_items - 1 - np.squeeze(np.array(train.getnnz(axis=1))))

    # Make sure invariants hold when there are ties
    model.user_embeddings = np.zeros_like(model.user_embeddings)
    model.item_embeddings = np.zeros_like(model.item_embeddings)
    model.user_biases = np.zeros_like(model.user_biases)
    model.item_biases = np.zeros_like(model.item_biases)

    ranks = model.predict_rank(rank_input, num_threads=2).todense()

    assert np.all(ranks.min(axis=1) == 0)
    assert np.all(ranks.max(axis=1) == 0)

    # Wrong input dimensions
    with pytest.raises(ValueError):
        model.predict_rank(sp.csr_matrix((5, 5)), num_threads=2)
Exemplo n.º 32
0
def test_training_schedules():

    model = LightFM(no_components=10,
                    learning_schedule='adagrad',
                    random_state=SEED)
    model.fit_partial(train,
                      epochs=0)

    assert (model.item_embedding_gradients == 1).all()
    assert (model.item_embedding_momentum == 0).all()
    assert (model.item_bias_gradients == 1).all()
    assert (model.item_bias_momentum == 0).all()

    assert (model.user_embedding_gradients == 1).all()
    assert (model.user_embedding_momentum == 0).all()
    assert (model.user_bias_gradients == 1).all()
    assert (model.user_bias_momentum == 0).all()

    model.fit_partial(train,
                      epochs=1)

    assert (model.item_embedding_gradients > 1).any()
    assert (model.item_embedding_momentum == 0).all()
    assert (model.item_bias_gradients > 1).any()
    assert (model.item_bias_momentum == 0).all()

    assert (model.user_embedding_gradients > 1).any()
    assert (model.user_embedding_momentum == 0).all()
    assert (model.user_bias_gradients > 1).any()
    assert (model.user_bias_momentum == 0).all()

    model = LightFM(no_components=10,
                    learning_schedule='adadelta',
                    random_state=SEED)
    model.fit_partial(train,
                      epochs=0)

    assert (model.item_embedding_gradients == 0).all()
    assert (model.item_embedding_momentum == 0).all()
    assert (model.item_bias_gradients == 0).all()
    assert (model.item_bias_momentum == 0).all()

    assert (model.user_embedding_gradients == 0).all()
    assert (model.user_embedding_momentum == 0).all()
    assert (model.user_bias_gradients == 0).all()
    assert (model.user_bias_momentum == 0).all()

    model.fit_partial(train,
                      epochs=1)

    assert (model.item_embedding_gradients > 0).any()
    assert (model.item_embedding_momentum > 0).any()
    assert (model.item_bias_gradients > 0).any()
    assert (model.item_bias_momentum > 0).any()

    assert (model.user_embedding_gradients > 0).any()
    assert (model.user_embedding_momentum > 0).any()
    assert (model.user_bias_gradients > 0).any()
    assert (model.user_bias_momentum > 0).any()
Exemplo n.º 33
0
def test_movielens_accuracy_resume():

    model = LightFM(random_state=SEED)

    for _ in range(10):
        model.fit_partial(train,
                          epochs=1)

    train_predictions = model.predict(train.row,
                                      train.col)
    test_predictions = model.predict(test.row,
                                     test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Exemplo n.º 34
0
def test_warp_precision_max_sampled():

    model = LightFM(learning_rate=0.05, max_sampled=1, loss='warp')

    # This is equivalent to a no-op pass
    # over the training data
    model.max_sampled = 0

    model.fit_partial(train, epochs=1)

    full_train_auc = full_auc(model, train)
    full_test_auc = full_auc(model, test)

    # The AUC should be no better than random
    assert full_train_auc < 0.55
    assert full_test_auc < 0.55
Exemplo n.º 35
0
def test_regularization():

    # Let's regularize
    model = LightFM(no_components=50,
                    item_alpha=0.0001,
                    user_alpha=0.0001)
    model.fit_partial(train,
                      epochs=30)

    train_predictions = model.predict(train.row,
                                      train.col)
    test_predictions = model.predict(test.row,
                                     test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.80
    assert roc_auc_score(test.data, test_predictions) > 0.75
Exemplo n.º 36
0
def test_overfitting():

    # Let's massivly overfit
    model = LightFM(no_components=50)
    model.fit_partial(train,
                      epochs=30)

    train_predictions = model.predict(train.row,
                                      train.col)
    test_predictions = model.predict(test.row,
                                     test.col)
    overfit_train = roc_auc_score(train.data, train_predictions)
    overfit_test = roc_auc_score(test.data, test_predictions)

    assert overfit_train > 0.99
    assert overfit_test < 0.75
Exemplo n.º 37
0
def test_auc_score():

    no_users, no_items = (10, 100)

    train = sp.rand(no_users, no_items, format='coo')
    train.data = np.ones_like(train.data)

    model = LightFM(loss='bpr')
    model.fit_partial(train)

    auc = evaluation.auc_score(model, train,
                               num_threads=2)[train.getnnz(axis=1) > 0]
    expected_auc = np.array(_auc(model, train))

    assert auc.shape == expected_auc.shape
    assert np.abs(auc.mean() - expected_auc.mean()) < 0.01
Exemplo n.º 38
0
def test_zeros_negative_accuracy():

    # Should get the same accuracy when zeros are used to
    # denote negative interactions
    train.data[train.data == -1] = 0
    model = LightFM()
    model.fit_partial(train,
                      epochs=10)

    train_predictions = model.predict(train.row,
                                      train.col)
    test_predictions = model.predict(test.row,
                                     test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Exemplo n.º 39
0
def test_logistic_precision():

    model = LightFM()
    model.fit_partial(train, epochs=10)

    train_precision = precision_at_k(model, train, 10)
    test_precision = precision_at_k(model, test, 10)

    full_train_auc = full_auc(model, train)
    full_test_auc = full_auc(model, test)

    assert train_precision > 0.3
    assert test_precision > 0.03

    assert full_train_auc > 0.79
    assert full_test_auc > 0.74
Exemplo n.º 40
0
def test_movielens_excessive_regularization():

    # Should perform poorly with high regularization
    model = LightFM(no_components=10,
                    item_alpha=1.0,
                    user_alpha=1.0)
    model.fit_partial(train,
                      epochs=10)

    train_predictions = model.predict(train.row,
                                      train.col)
    test_predictions = model.predict(test.row,
                                     test.col)

    assert roc_auc_score(train.data, train_predictions) < 0.6
    assert roc_auc_score(test.data, test_predictions) < 0.6
Exemplo n.º 41
0
def test_warp_precision():

    model = LightFM(learning_rate=0.05, loss='warp')

    model.fit_partial(train, epochs=10)

    train_precision = precision_at_k(model, train, 10)
    test_precision = precision_at_k(model, test, 10)

    full_train_auc = full_auc(model, train)
    full_test_auc = full_auc(model, test)

    assert train_precision > 0.45
    assert test_precision > 0.07

    assert full_train_auc > 0.94
    assert full_test_auc > 0.9
Exemplo n.º 42
0
def test_bpr_precision_high_interaction_values():

    model = LightFM(learning_rate=0.05, loss='bpr', random_state=SEED)

    _train = train.copy()
    _train.data = _train.data * 5

    model.fit_partial(_train, epochs=10)

    (train_precision, test_precision, full_train_auc,
     full_test_auc) = _get_metrics(model, _train, test)

    assert train_precision > 0.31
    assert test_precision > 0.04

    assert full_train_auc > 0.86
    assert full_test_auc > 0.84
Exemplo n.º 43
0
def test_precision_at_k():

    no_users, no_items = (10, 100)

    train = sp.rand(no_users, no_items, format='coo')
    train.data = np.ones_like(train.data)

    model = LightFM(loss='bpr')
    model.fit_partial(train)

    k = 10

    mean_precision = evaluation.precision_at_k(
        model, train, k=k)[train.getnnz(axis=1) > 0].mean()
    expected_mean_precision = _precision_at_k(model, train, k)

    assert np.allclose(mean_precision, expected_mean_precision)
Exemplo n.º 44
0
def test_zero_weights_accuracy():

    # When very small weights are used
    # accuracy should be no better than
    # random.
    weights = train.copy()
    weights.data = np.zeros(train.getnnz(), dtype=np.float32)

    for loss in ('logistic', 'bpr', 'warp'):
        model = LightFM(loss=loss, random_state=SEED)
        model.fit_partial(train, sample_weight=weights, epochs=10)

        train_predictions = model.predict(train.row, train.col)
        test_predictions = model.predict(test.row, test.col)

        assert 0.45 < roc_auc_score(train.data, train_predictions) < 0.55
        assert 0.45 < roc_auc_score(test.data, test_predictions) < 0.55
Exemplo n.º 45
0
def test_hogwild_accuracy():

    # Should get comparable accuracy with 2 threads
    model = LightFM()
    model.fit_partial(train,
                      epochs=10,
                      num_threads=2)

    train_predictions = model.predict(train.row,
                                      train.col,
                                      num_threads=2)
    test_predictions = model.predict(test.row,
                                     test.col,
                                     num_threads=2)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Exemplo n.º 46
0
def test_bpr_precision_multithreaded():

    model = LightFM(learning_rate=0.05, loss='bpr')

    model.fit_partial(train, epochs=10, num_threads=4)

    train_precision = precision_at_k(model, train, 10)
    test_precision = precision_at_k(model, test, 10)

    full_train_auc = full_auc(model, train)
    full_test_auc = full_auc(model, test)

    assert train_precision > 0.31
    assert test_precision > 0.04

    assert full_train_auc > 0.86
    assert full_test_auc > 0.84
Exemplo n.º 47
0
def test_return_self():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    model = LightFM()
    assert model.fit_partial(train) is model
    assert model.fit(train) is model
Exemplo n.º 48
0
def test_warp_precision_adadelta():

    model = LightFM(learning_schedule='adadelta',
                    rho=0.95,
                    epsilon=0.000001,
                    loss='warp',
                    random_state=SEED)

    model.fit_partial(train, epochs=10, num_threads=1)

    (train_precision, test_precision, full_train_auc,
     full_test_auc) = _get_metrics(model, train, test)

    assert train_precision > 0.45
    assert test_precision > 0.07

    assert full_train_auc > 0.94
    assert full_test_auc > 0.9
Exemplo n.º 49
0
def test_random_state_fixing():

    model = LightFM(learning_rate=0.05,
                    loss='warp',
                    random_state=SEED)

    model.fit_partial(train,
                      epochs=2)

    model_2 = LightFM(learning_rate=0.05,
                      loss='warp',
                      random_state=SEED)

    model_2.fit_partial(train,
                        epochs=2)

    assert np.all(model.user_embeddings == model_2.user_embeddings)
    assert np.all(model.item_embeddings == model_2.item_embeddings)
Exemplo n.º 50
0
def test_random_state_advanced():
    # Check that using the random state
    # to seed rand_r in Cython advances
    # the random generator state.

    model = LightFM(learning_rate=0.05,
                    loss='warp',
                    random_state=SEED)

    model.fit_partial(train,
                      epochs=1)

    rng_state = model.random_state.get_state()[1].copy()

    model.fit_partial(train,
                      epochs=1)

    assert not np.all(rng_state == model.random_state.get_state()[1])
Exemplo n.º 51
0
def test_auc_score():

    no_users, no_items = (10, 100)

    train = sp.rand(no_users, no_items, format='coo')
    train.data = np.ones_like(train.data)

    model = LightFM(loss='bpr')
    model.fit_partial(train)

    auc = evaluation.auc_score(model,
                               train,
                               num_threads=2)[train.getnnz(axis=1) > 0]
    expected_auc = np.array(_auc(model,
                                 train))

    assert auc.shape == expected_auc.shape
    assert np.abs(auc.mean() - expected_auc.mean()) < 0.01
Exemplo n.º 52
0
def test_logistic_precision():

    model = LightFM(random_state=SEED)
    model.fit_partial(train,
                      epochs=10)

    (train_precision,
     test_precision,
     full_train_auc,
     full_test_auc) = _get_metrics(model,
                                   train,
                                   test)

    assert train_precision > 0.3
    assert test_precision > 0.03

    assert full_train_auc > 0.79
    assert full_test_auc > 0.73
Exemplo n.º 53
0
    def update(self,
               mat_interaction,
               mat_user_feature,
               mat_item_feature,
               epoch=3,
               threads=8):
        model = LightFM(loss=self.loss,
                        learning_rate=self.learning_rate,
                        item_alpha=self.item_alpha,
                        user_alpha=self.user_alpha)
        model.fit_partial(mat_interaction,
                          user_features=mat_user_feature,
                          item_features=mat_item_feature,
                          epochs=epoch,
                          num_threads=threads,
                          verbose=False)

        return model
Exemplo n.º 54
0
def test_return_self():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    model = LightFM()
    assert model.fit_partial(train) is model
    assert model.fit(train) is model
Exemplo n.º 55
0
class LightFMRecommender(object):
    def __init__(self,
                 n_comp=30,
                 loss='warp-kos',
                 learning='adagrad',
                 alpha=1e-3):
        alpha = 1e-3
        self.model = LightFM(no_components=30,
                             loss='warp-kos',
                             learning_schedule='adagrad',
                             user_alpha=alpha,
                             item_alpha=alpha)

        # self.model = LightFM(no_components=n_comp,
        #                 loss=loss,
        #                 learning_schedule= learning,
        #                 user_alpha=alpha, item_alpha=alpha)

    def fit(self, urm, epochs=100):
        self.urm = urm
        self.n_tracks = urm.shape[1]
        for epoch in range(epochs):
            self.model.fit_partial(urm.getCSR(), epochs=1)

    def get_pred_row(self, user_id):
        return self.model.predict(user_id, np.arange(self.n_tracks))

    def s_recommend(self, user_id, nRec=10):
        scores = self.model.predict(user_id, np.arange(self.n_tracks))
        top_items = np.argsort(-scores)

        recommended_items = self._filter_seen(user_id, top_items)
        return recommended_items[0:nRec]

    def _filter_seen(self, user_id, ranking):
        seen = self.urm.extractTracksFromPlaylist(user_id)
        unseen_mask = np.in1d(ranking, seen, assume_unique=True, invert=True)
        return ranking[unseen_mask]

    def m_recommend(self, target_ids, nRec=10):
        results = []
        for tid in target_ids:
            results.append(self.s_recommend(tid, nRec))
        return results
Exemplo n.º 56
0
def test_user_supplied_features_accuracy():

    model = LightFM()
    model.fit_partial(train,
                      user_features=train_user_features,
                      item_features=train_item_features,
                      epochs=10)

    train_predictions = model.predict(train.row,
                                      train.col,
                                      user_features=train_user_features,
                                      item_features=train_item_features)
    test_predictions = model.predict(test.row,
                                     test.col,
                                     user_features=test_user_features,
                                     item_features=test_item_features)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76