def test_to_df_fails_with_mismatched_inputs(sample_dataset): users, items = sample_dataset[:, 0], sample_dataset[:, 1] d = DatasetEncoder() d.fit(users=users, items=items) encoded = d.transform(users=users, items=items) recommended = [ np.random.choice(encoded["items"], 10, replace=False) for _ in range(int(len(users) / 2)) ] with pytest.raises(ValueError): d.to_df(users, recommended)
def test_to_df_succeeds(sample_dataset): users, items = sample_dataset[:, 0], sample_dataset[:, 1] d = DatasetEncoder() d.fit(users=users, items=items) encoded = d.transform(users=users, items=items) recommended = [ np.random.choice(encoded["items"], 10, replace=False) for _ in users ] rdf = d.to_df(encoded["users"], recommended, target_col="user") assert np.all(rdf["user"].values == users) for i, row in enumerate(rdf.values[:, 1:]): assert np.all(row == d.inverse_transform( items=recommended[i])["items"])
encoder.fit(ratings["user"], ratings["item"]) train_ds = Dataset.from_df(train_df, normalize=utils.as_implicit, encoder=encoder) test_ds = Dataset.from_df(test_df, normalize=utils.as_implicit, encoder=encoder) model = GMFModel(fit_params=dict(epochs=10, batch_size=256), n_factors=32, negative_samples=4) model.fit(train_ds) _, test_items, _ = test_ds.to_components(shuffle=False) # evaluate users, items = he_sampling(test_ds, train_ds, n_samples=200) recommended = model.predict(test_ds, users=users, items=items, n=10) print("t-nDCG", score(metrics.truncated_ndcg, test_items, recommended).mean()) print("HR@k", score(metrics.precision_at_k, test_items, recommended).mean()) # results recommended = model.predict(users=users, items=items[:, 1:], n=3) recommended_df = encoder.to_df(users, recommended) recommended_df.to_csv("recs.csv", index=False)