示例#1
0
 def test_recall_at_k(self):
     # Check non-none results with and without preserve_rows
     self.assertIsNotNone(recall_at_k(predicted_ranks=self.ranks,
                                      test_interactions=self.interactions,
                                      k=5,
                                      preserve_rows=False))
     self.assertIsNotNone(recall_at_k(predicted_ranks=self.ranks,
                                      test_interactions=self.interactions,
                                      k=5,
                                      preserve_rows=True))
示例#2
0
 def test_recall_at_k(self):
     # Check non-none results with and without preserve_rows
     self.assertIsNotNone(
         recall_at_k(predicted_ranks=self.ranks,
                     test_interactions=self.interactions,
                     k=5,
                     preserve_rows=False))
     self.assertIsNotNone(
         recall_at_k(predicted_ranks=self.ranks,
                     test_interactions=self.interactions,
                     k=5,
                     preserve_rows=True))
示例#3
0
 def test_recall_at_k(self):
     # Check non-none results with and without preserve_rows
     self.assertIsNotNone(recall_at_k(model=self.model,
                                      test_interactions=self.interactions,
                                      k=5,
                                      user_features=self.user_features,
                                      item_features=self.item_features,
                                      preserve_rows=False))
     self.assertIsNotNone(recall_at_k(model=self.model,
                                      test_interactions=self.interactions,
                                      k=5,
                                      user_features=self.user_features,
                                      item_features=self.item_features,
                                      preserve_rows=True))
示例#4
0
    def test_basic_usage(self):
        # Build the model with default parameters
        model = TensorRec()

        # Generate some dummy data
        interactions, user_features, item_features = generate_dummy_data(
            num_users=100, num_items=150, interaction_density=.05)

        # Fit the model
        model.fit(interactions,
                  user_features,
                  item_features,
                  epochs=5,
                  verbose=True)

        # Predict scores for user 75 on items 100, 101, and 102
        predictions = model.predict(user_ids=[75, 75, 75],
                                    item_ids=[100, 101, 102],
                                    user_features=user_features,
                                    item_features=item_features)

        # Calculate and print the recall at 10
        r_at_k = recall_at_k(model,
                             interactions,
                             k=10,
                             user_features=user_features,
                             item_features=item_features)
        print(np.mean(r_at_k))

        self.assertIsNotNone(predictions)
示例#5
0
    def metric_test(self):
        """ uses tensorrec eval as benchmark for rating performance of various reco algorithms """
        k = 10
        latent_factor = 10
        n_users = 10
        n_items = 12

        interactions, user_features, item_features = util.generate_dummy_data_with_indicator(
            num_users=n_users, num_items=n_items, interaction_density=.5)
        print("interactiosn shape={}".format(np.shape(interactions)))
        print("user features shape={}".format(np.shape(
            user_features.toarray())))
        print("item features shape={}".format(np.shape(
            item_features.toarray())))

        model = TensorRec(n_components=latent_factor)

        model.fit(interactions, user_features, item_features, epochs=19)

        ranks = model.predict_rank(user_features=user_features,
                                   item_features=item_features)

        print("Ranks shape={}".format(np.shape(ranks)))

        self.assertTrue(np.shape(interactions) == np.shape(ranks))

        tr_recall_result = eval.recall_at_k(predicted_ranks=ranks,
                                            test_interactions=interactions,
                                            k=k,
                                            preserve_rows=False)
        # print (tr_recall_result.mean())

        tr_precision_result = eval.precision_at_k(
            predicted_ranks=ranks,
            test_interactions=interactions,
            k=k,
            preserve_rows=False)
        # print(tr_precision_result.mean())

        # we need csr for interactions data
        interactions_ = interactions.tocsr()
        recall_result = metrics.recall_at_k(ranks,
                                            interactions_,
                                            k=k,
                                            preserve_rows=False)
        # print(recall_result.mean())

        precision_result = metrics.precision_at_k(ranks,
                                                  interactions_,
                                                  k=k,
                                                  preserve_rows=False)
        # print (precision_result.mean())

        self.assertTrue(tr_recall_result.mean() == recall_result.mean())
        self.assertTrue(tr_precision_result.mean() == precision_result.mean())
示例#6
0
        movie_name = item_titles[movie]
        movie_position = movie_positions[movie]
        # Uncomment this line to write movie titles to the plot.
        # ax.annotate(movie_name, movie_position[0:2], fontsize='x-small')

    file = '/tmp/tensorrec/movielens/epoch_{}.jpg'.format(epoch)
    plt.savefig(file)

    logging.info("Finished epoch {}".format(epoch))

p_at_k = precision_at_k(model, test_interactions,
                        user_features=user_features,
                        item_features=item_features,
                        k=5)
r_at_k = recall_at_k(model, test_interactions,
                     user_features=user_features,
                     item_features=item_features,
                     k=30)

logging.info("Precision@5: {}, Recall@30: {}".format(np.mean(p_at_k), np.mean(r_at_k)))

# Use the collected JPG files to create an MP4 video of the model fitting, then delete the JPGs.
fps = 12
file_list = glob.glob('/tmp/tensorrec/movielens/*.jpg')
list.sort(file_list, key=lambda x: int(x.split('_')[1].split('.jpg')[0]))
clip = mpy.ImageSequenceClip(file_list, fps=fps)
vid_file = '/tmp/tensorrec/movielens/movielens.mp4'
clip.write_videofile(filename=vid_file, fps=fps, codec='mpeg4', preset='veryslow', ffmpeg_params=['-qscale:v', '10'])
for file in file_list:
    os.remove(file)
示例#7
0
        movie_name = item_titles[movie]
        movie_position = movie_positions[movie]
        # Comment this line to remove movie titles to the plot.
        ax.annotate(movie_name, movie_position[0:2], fontsize='x-small')

    file = '/tmp/tensorrec/movielens/epoch_{}.jpg'.format(epoch)
    plt.savefig(file)

    logging.info("Finished epoch {}".format(epoch))

ranks = model.predict_rank(
    user_features=user_features,
    item_features=item_features,
)
p_at_k = precision_at_k(ranks, test_interactions, k=5)
r_at_k = recall_at_k(ranks, test_interactions, k=30)

logging.info("Precision@5: {}, Recall@30: {}".format(np.mean(p_at_k),
                                                     np.mean(r_at_k)))

# Use the collected JPG files to create an MP4 video of the model fitting, then delete the JPGs.
fps = 12
file_list = glob.glob('/tmp/tensorrec/movielens/*.jpg')
list.sort(file_list, key=lambda x: int(x.split('_')[1].split('.jpg')[0]))
clip = mpy.ImageSequenceClip(file_list, fps=fps)
vid_file = '/tmp/tensorrec/movielens/movielens.mp4'
clip.write_videofile(filename=vid_file,
                     fps=fps,
                     codec='mpeg4',
                     preset='veryslow',
                     ffmpeg_params=['-qscale:v', '10'])
    ax.scatter(*zip(*movie_positions[movies_to_plot]), s=2)
    ax.set_aspect('equal')

    for i, movie in enumerate(movies_to_plot):
        movie_name = item_titles[movie]
        movie_position = movie_positions[movie]
        # Comment this line to remove movie titles to the plot.
        ax.annotate(movie_name, movie_position[0:2], fontsize='x-small')

    file = '/tmp/tensorrec/movielens/epoch_{}.jpg'.format(epoch)
    plt.savefig(file)

    logging.info("Finished epoch {}".format(epoch))

ranks = model.predict_rank(user_features=user_features,
                           item_features=item_features,)
p_at_k = precision_at_k(ranks, test_interactions, k=5)
r_at_k = recall_at_k(ranks, test_interactions, k=30)

logging.info("Precision@5: {}, Recall@30: {}".format(np.mean(p_at_k), np.mean(r_at_k)))

# Use the collected JPG files to create an MP4 video of the model fitting, then delete the JPGs.
fps = 12
file_list = glob.glob('/tmp/tensorrec/movielens/*.jpg')
list.sort(file_list, key=lambda x: int(x.split('_')[1].split('.jpg')[0]))
clip = mpy.ImageSequenceClip(file_list, fps=fps)
vid_file = '/tmp/tensorrec/movielens/movielens.mp4'
clip.write_videofile(filename=vid_file, fps=fps, codec='mpeg4', preset='veryslow', ffmpeg_params=['-qscale:v', '10'])
for file in file_list:
    os.remove(file)
                                      shape=(n_users, n_items))

    # train collaborative filtering model
    epochs = 500
    alpha = 0.00001
    n_components = 10
    verbose = True
    learning_rate = 0.01
    n_sampled_items = int(n_items*0.01)
    fit_kwargs = {'epochs': epochs, 'alpha': alpha, 'verbose': verbose, 'learning_rate': learning_rate,
                  'n_sampled_items': n_sampled_items}

    cf_model = TensorRec(n_components=10,
                         user_repr_graph=NormalizedLinearRepresentationGraph(),
                         loss_graph=WMRBLossGraph())

    cf_model.fit(user_features=user_features, item_features=item_features,
                 interactions=train_interactions, **fit_kwargs)

    # calculate test ranks excluding training items
    predicted_ranks = cf_model.predict_rank(user_features=test_user_features, item_features=item_features)
    predicted_ranks[train.uid - 1, train.iid - 1] = n_items + 1
    predicted_ranks = predicted_ranks.argsort(axis=1).argsort(axis=1) + 1

    # evaluate precision and recall
    precision_results = precision_at_k(predicted_ranks, test_interactions, k=10)
    recall_results = recall_at_k(predicted_ranks, test_interactions, k=10)

    logging.info("Precision at 10: {}".format(np.mean(precision_results)))
    logging.info("Recall at 10: {}".format(np.mean(recall_results)))