def test_predict_ranks(): no_users, no_items = (10, 100) train = sp.coo_matrix((no_users, no_items), dtype=np.float32) model = LightFM() model.fit_partial(train) # Compute ranks for all items rank_input = sp.csr_matrix(np.ones((no_users, no_items))) ranks = model.predict_rank(rank_input, num_threads=2).todense() assert np.all(ranks.min(axis=1) == 0) assert np.all(ranks.max(axis=1) == no_items - 1) for row in range(no_users): assert np.all(np.sort(ranks[row]) == np.arange(no_items)) # Make sure this is true also when there are ties model.user_embeddings = np.zeros_like(model.user_embeddings) model.item_embeddings = np.zeros_like(model.item_embeddings) model.user_biases = np.zeros_like(model.user_biases) model.item_biases = np.zeros_like(model.item_biases) ranks = model.predict_rank(rank_input, num_threads=2).todense() assert np.all(ranks.min(axis=1) == 0) assert np.all(ranks.max(axis=1) == 0) # Wrong input dimensions with pytest.raises(ValueError): model.predict_rank(sp.csr_matrix((5, 5)), num_threads=2)
def test_predict_ranks(): no_users, no_items = (10, 100) train = sp.coo_matrix((no_users, no_items), dtype=np.float32) train = sp.rand(no_users, no_items, format='csr', random_state=42) model = LightFM() model.fit_partial(train) # Compute ranks for all items rank_input = sp.csr_matrix(np.ones((no_users, no_items))) ranks = model.predict_rank(rank_input, num_threads=2).todense() assert np.all(ranks.min(axis=1) == 0) assert np.all(ranks.max(axis=1) == no_items - 1) for row in range(no_users): assert np.all(np.sort(ranks[row]) == np.arange(no_items)) # Train set exclusions. All ranks should be zero # if train interactions is dense. ranks = model.predict_rank(rank_input, train_interactions=rank_input).todense() assert np.all(ranks == 0) # Max rank should be num_items - 1 - number of positives # in train in that row ranks = model.predict_rank(rank_input, train_interactions=train).todense() assert np.all(np.squeeze(np.array(ranks.max(axis=1))) == no_items - 1 - np.squeeze(np.array(train.getnnz(axis=1)))) # Make sure ranks are computed pessimistically when # there are ties (that is, equal predictions for every # item will assign maximum rank to each). model.user_embeddings = np.zeros_like(model.user_embeddings) model.item_embeddings = np.zeros_like(model.item_embeddings) model.user_biases = np.zeros_like(model.user_biases) model.item_biases = np.zeros_like(model.item_biases) ranks = model.predict_rank(rank_input, num_threads=2).todense() assert np.all(ranks.min(axis=1) == 99) assert np.all(ranks.max(axis=1) == 99) # Wrong input dimensions with pytest.raises(ValueError): model.predict_rank(sp.csr_matrix((5, 5)), num_threads=2)
def test_predict_ranks(): no_users, no_items = (10, 100) train = sp.coo_matrix((no_users, no_items), dtype=np.float32) train = sp.rand(no_users, no_items, format='csr') model = LightFM() model.fit_partial(train) # Compute ranks for all items rank_input = sp.csr_matrix(np.ones((no_users, no_items))) ranks = model.predict_rank(rank_input, num_threads=2).todense() assert np.all(ranks.min(axis=1) == 0) assert np.all(ranks.max(axis=1) == no_items - 1) for row in range(no_users): assert np.all(np.sort(ranks[row]) == np.arange(no_items)) # Train set exclusions. All ranks should be zero # if train interactions is dense. ranks = model.predict_rank(rank_input, train_interactions=rank_input).todense() assert np.all(ranks == 0) # Max rank should be num_items - 1 - number of positives # in train in that row ranks = model.predict_rank(rank_input, train_interactions=train).todense() assert np.all(np.squeeze(np.array(ranks.max(axis=1))) == no_items - 1 - np.squeeze(np.array(train.getnnz(axis=1)))) # Make sure invariants hold when there are ties model.user_embeddings = np.zeros_like(model.user_embeddings) model.item_embeddings = np.zeros_like(model.item_embeddings) model.user_biases = np.zeros_like(model.user_biases) model.item_biases = np.zeros_like(model.item_biases) ranks = model.predict_rank(rank_input, num_threads=2).todense() assert np.all(ranks.min(axis=1) == 0) assert np.all(ranks.max(axis=1) == 0) # Wrong input dimensions with pytest.raises(ValueError): model.predict_rank(sp.csr_matrix((5, 5)), num_threads=2)
def test_precision_at_k_with_ties(): no_users, no_items = (10, 100) train, test = _generate_data(no_users, no_items) model = LightFM(loss="bpr") model.fit_partial(train) # Make all predictions zero model.user_embeddings = np.zeros_like(model.user_embeddings) model.item_embeddings = np.zeros_like(model.item_embeddings) model.user_biases = np.zeros_like(model.user_biases) model.item_biases = np.zeros_like(model.item_biases) k = 10 precision = evaluation.precision_at_k(model, test, k=k) # Pessimistic precision with all ties assert precision.mean() == 0.0
def test_predict_scores(num_threads=2): no_users, no_items = (10, 100) train = sp.coo_matrix((no_users, no_items), dtype=np.float32) train = sp.rand(no_users, no_items, format='csr') model = LightFM() model.fit_partial(train) # Compute scores and check if results equal to model.predict predict_input = sp.csr_matrix(np.ones((no_users, no_items))) scores = model.predict_score(predict_input, num_threads=num_threads).todense() for uid in range(no_users): scores_arr = model.predict(np.repeat(uid, no_items), np.arange(no_items)) score_slice = np.array(scores)[uid, :] assert np.array_equal(score_slice, scores_arr) # check if precompute and parallelization work correctly scores_serial = model.predict_score(predict_input, num_threads=1).todense() scores_no_prec = model.predict_score(predict_input, num_threads=num_threads, precompute_representations=False ).todense() scores_ser_no_prec = model.predict_score(predict_input, num_threads=1, precompute_representations=False ).todense() assert np.array_equal(scores, scores_serial) assert np.array_equal(scores, scores_no_prec) assert np.array_equal(scores, scores_ser_no_prec) # Compute ranks and compares with ranks computed from scores ranks = model.predict_rank(predict_input, num_threads=num_threads).todense() def rank_scores(s): # ranks from scores as in http://stackoverflow.com/a/14672797/5251962 u, v = np.unique(s, return_inverse=True) return len(s) - 1 - (np.cumsum(np.bincount(v)) - 1)[v] check_ranks = np.apply_along_axis(rank_scores, 1, scores) assert np.array_equal(ranks, check_ranks) # Train set exclusions. All scores should be zero # if train interactions is dense. scores = model.predict_score(predict_input, train_interactions=predict_input).todense() assert np.all(scores == 0) # Make sure invariants hold when there are ties model.user_embeddings = np.zeros_like(model.user_embeddings) model.item_embeddings = np.zeros_like(model.item_embeddings) model.user_biases = np.zeros_like(model.user_biases) model.item_biases = np.zeros_like(model.item_biases) scores = model.predict_score(predict_input, num_threads=num_threads).todense() assert np.all(scores.min(axis=1) == 0) assert np.all(scores.max(axis=1) == 0) # Wrong input dimensions with pytest.raises(ValueError): model.predict_score(sp.csr_matrix((5, 5)), num_threads=num_threads)
def test_predict_scores(num_threads=2): no_users, no_items = (10, 100) train = sp.coo_matrix((no_users, no_items), dtype=np.float32) train = sp.rand(no_users, no_items, format='csr') model = LightFM() model.fit_partial(train) # Compute scores and check if results equal to model.predict predict_input = sp.csr_matrix(np.ones((no_users, no_items))) scores = model.predict_score(predict_input, num_threads=num_threads).todense() for uid in range(no_users): scores_arr = model.predict(np.repeat(uid, no_items), np.arange(no_items)) score_slice = np.array(scores)[uid, :] assert np.array_equal(score_slice, scores_arr) # check if precompute and parallelization work correctly scores_serial = model.predict_score(predict_input, num_threads=1).todense() scores_no_prec = model.predict_score( predict_input, num_threads=num_threads, precompute_representations=False).todense() scores_ser_no_prec = model.predict_score( predict_input, num_threads=1, precompute_representations=False).todense() assert np.array_equal(scores, scores_serial) assert np.array_equal(scores, scores_no_prec) assert np.array_equal(scores, scores_ser_no_prec) # Compute ranks and compares with ranks computed from scores ranks = model.predict_rank(predict_input, num_threads=num_threads).todense() def rank_scores(s): # ranks from scores as in http://stackoverflow.com/a/14672797/5251962 u, v = np.unique(s, return_inverse=True) return len(s) - 1 - (np.cumsum(np.bincount(v)) - 1)[v] check_ranks = np.apply_along_axis(rank_scores, 1, scores) assert np.array_equal(ranks, check_ranks) # Train set exclusions. All scores should be zero # if train interactions is dense. scores = model.predict_score(predict_input, train_interactions=predict_input).todense() assert np.all(scores == 0) # Make sure invariants hold when there are ties model.user_embeddings = np.zeros_like(model.user_embeddings) model.item_embeddings = np.zeros_like(model.item_embeddings) model.user_biases = np.zeros_like(model.user_biases) model.item_biases = np.zeros_like(model.item_biases) scores = model.predict_score(predict_input, num_threads=num_threads).todense() assert np.all(scores.min(axis=1) == 0) assert np.all(scores.max(axis=1) == 0) # Wrong input dimensions with pytest.raises(ValueError): model.predict_score(sp.csr_matrix((5, 5)), num_threads=num_threads)