def test_get_item_feature_with_new_items(self):
        n_users = 101
        n_items = 233
        n_data = 3007
        am1 = _make_sparse_matrix(n_users, n_items, n_data)
        am2 = 2 * _make_sparse_matrix(n_users, n_items, n_data)
        adjacency_matrix = am1 + am2
        user_ids = adjacency_matrix.tocoo().row
        item_ids = adjacency_matrix.tocoo().col
        ratings = adjacency_matrix.tocoo().data
        item_features = [{i: np.array([i]) for i in range(n_items)}]
        dataset = GcmcDataset(user_ids,
                              item_ids,
                              ratings,
                              item_features=item_features)
        graph_dataset = GcmcGraphDataset(dataset, test_size=0.1)
        encoder_hidden_size = 100
        encoder_size = 100
        scope_name = 'GraphConvolutionalMatrixCompletionGraph'
        model = GraphConvolutionalMatrixCompletion(
            graph_dataset=graph_dataset,
            encoder_hidden_size=encoder_hidden_size,
            encoder_size=encoder_size,
            scope_name=scope_name,
            batch_size=1024,
            epoch_size=10,
            learning_rate=0.01,
            dropout_rate=0.7,
            normalization_type='symmetric')
        model.fit()

        user_ids = [90, 62, 3, 3]
        item_ids = [11, 236, 240, 243]
        additional_item_features = {
            item_id: np.array([999])
            for item_id in item_ids
        }
        additional_dataset = GcmcDataset(
            np.array(user_ids),
            np.array(item_ids),
            np.array([1, 2, 1, 1]),
            item_features=[additional_item_features])

        target_item_ids = item_ids + [12, 13, 17, 55
                                      ]  # item_ids to get embeddings

        item_feature = model.get_item_feature_with_new_items(
            item_ids=target_item_ids, additional_dataset=additional_dataset)
        self.assertEqual(len(item_feature), 2)
        self.assertEqual(list(item_feature[0]), target_item_ids)
        self.assertEqual(item_feature[1].shape,
                         (len(target_item_ids), encoder_size))
        output_embedding = {k: v for k, v in zip(*item_feature)}
        np.testing.assert_almost_equal(output_embedding[240],
                                       output_embedding[243])
 def test_run(self):
     # This tests that GraphConvolutionalMatrixCompletion runs without error, and its loss and rmse are small enough.
     n_users = 101
     n_items = 233
     n_data = 3007
     am1 = _make_sparse_matrix(n_users, n_items, n_data)
     am2 = 2 * _make_sparse_matrix(n_users, n_items, n_data)
     adjacency_matrix = am1 + am2
     user_ids = adjacency_matrix.tocoo().row
     item_ids = adjacency_matrix.tocoo().col
     ratings = adjacency_matrix.tocoo().data
     dataset = GcmcDataset(user_ids, item_ids, ratings)
     graph_dataset = GcmcGraphDataset(dataset, test_size=0.1)
     encoder_hidden_size = 100
     encoder_size = 100
     scope_name = 'GraphConvolutionalMatrixCompletionGraph'
     model = GraphConvolutionalMatrixCompletion(
         graph_dataset=graph_dataset,
         encoder_hidden_size=encoder_hidden_size,
         encoder_size=encoder_size,
         scope_name=scope_name,
         batch_size=1024,
         epoch_size=10,
         learning_rate=0.01,
         dropout_rate=0.7,
         normalization_type='symmetric')
     reports = model.fit()
     test_loss = float(reports[-1].split(',')[-2].split('=')[-1])
     test_rmse = float(reports[-1].split(',')[-1].split('=')[-1][:-1])
     self.assertLess(test_loss, 1.0)
     self.assertLess(test_rmse, 0.7)
Пример #3
0
 def test_with_click_threshold(self):
     user_ids = np.array([1, 1, 2, 2, 2, 3])
     item_ids = np.array([1, 2, 1, 2, 3, 1])
     ratings = np.array([1, 0, 1, 0, 1, 0])
     test_size = 0.0
     user_features = [{
         1: np.array([10, 11]),
         2: np.array([20, 21]),
         3: np.array([30, 31])
     }]
     item_features = [{
         1: np.array([10, 11, 12]),
         2: np.array([20, 21, 22]),
         3: np.array([30, 31, 32])
     }]
     dataset = GcmcDataset(user_ids=user_ids,
                           item_ids=item_ids,
                           ratings=ratings,
                           user_features=user_features,
                           item_features=item_features)
     graph_dataset = GcmcGraphDataset(dataset,
                                      test_size,
                                      min_user_click_count=3)
     np.testing.assert_almost_equal([0, 0, 1, 1, 1, 0],
                                    graph_dataset._user.indices)
     np.testing.assert_almost_equal([1, 2, 1, 2, 3, 1],
                                    graph_dataset._item.indices)
     data = graph_dataset.train_data()
     self.assertEqual(item_ids.shape, graph_dataset._item.indices.shape)
     self.assertEqual((ratings.shape[0], 2), data['label'].shape)
     self.assertEqual(ratings.shape, data['rating'].shape)
     self.assertEqual(user_ids.shape, data['user_feature_indices'].shape)
     self.assertEqual(item_ids.shape, data['item_feature_indices'].shape)
    def test_item_cold_start(self):
        n_users = 101
        n_items = 233
        n_data = 3007
        am1 = _make_sparse_matrix(n_users, n_items, n_data)
        am2 = 2 * _make_sparse_matrix(n_users, n_items, n_data)
        adjacency_matrix = am1 + am2
        user_ids = adjacency_matrix.tocoo().row
        item_ids = adjacency_matrix.tocoo().col
        ratings = adjacency_matrix.tocoo().data
        item_features = [{i: np.array([i]) for i in range(n_items)}]
        dataset = GcmcDataset(user_ids,
                              item_ids,
                              ratings,
                              item_features=item_features)
        graph_dataset = GcmcGraphDataset(dataset, test_size=0.1)
        encoder_hidden_size = 100
        encoder_size = 100
        scope_name = 'GraphConvolutionalMatrixCompletionGraph'
        model = GraphConvolutionalMatrixCompletion(
            graph_dataset=graph_dataset,
            encoder_hidden_size=encoder_hidden_size,
            encoder_size=encoder_size,
            scope_name=scope_name,
            batch_size=1024,
            epoch_size=10,
            learning_rate=0.01,
            dropout_rate=0.7,
            normalization_type='symmetric')
        model.fit()

        user_ids = [90, 62]
        item_ids = [11, 236]  # 236 is new items
        additional_dataset = GcmcDataset(np.array(user_ids),
                                         np.array(item_ids),
                                         np.array([1, 2]),
                                         item_features=[{
                                             236: np.array([236])
                                         }])
        results = model.predict_with_new_items(
            user_ids, item_ids, additional_dataset=additional_dataset)
        self.assertEqual(2, len(results))
        self.assertIsNotNone(results[0])
        self.assertIsNotNone(results[1])
Пример #5
0
 def test_without_information(self):
     user_ids = np.array([1, 1, 2, 2, 2])
     item_ids = np.array([1, 2, 1, 2, 3])
     ratings = np.array([1, 0, 1, 0, 1])
     rating_data = GcmcDataset(user_ids=user_ids,
                               item_ids=item_ids,
                               ratings=ratings)
     test_size = 0.0
     dataset = GcmcGraphDataset(rating_data, test_size)
     data = dataset.train_data()
     self.assertEqual(user_ids.shape, data['user'].shape)
     self.assertEqual(item_ids.shape, data['item'].shape)
     self.assertEqual((ratings.shape[0], 2), data['label'].shape)
     self.assertEqual(ratings.shape, data['rating'].shape)
     self.assertEqual(user_ids.shape, data['user_feature_indices'].shape)
     self.assertEqual(item_ids.shape, data['item_feature_indices'].shape)
Пример #6
0
 def test(self):
     # This tests that GraphConvolutionalMatrixCompletion runs without error, and its loss and rmse are small enough.
     n_users = 101
     n_items = 233
     n_data = 3007
     am1 = _make_sparse_matrix(n_users, n_items, n_data)
     am2 = 2 * _make_sparse_matrix(n_users, n_items, n_data)
     adjacency_matrix = am1 + am2
     user_ids = adjacency_matrix.tocoo().row
     item_ids = adjacency_matrix.tocoo().col
     ratings = adjacency_matrix.tocoo().data
     item_features = [{i: np.array([i]) for i in range(n_items)}]
     rating_data = GcmcDataset(user_ids,
                               item_ids,
                               ratings,
                               item_features=item_features)
     dataset = GcmcGraphDataset(dataset=rating_data, test_size=0.2)
     self.assertEqual(
         (n_items + 1, 1),
         dataset.item_features[0].shape)  # because of default index.
Пример #7
0
    def run(self):
        tf.reset_default_graph()
        df = self.load_data_frame('train_data', required_columns={self.user_column_name, self.item_column_name, self.rating_column_name})
        user_features = self.load('user_features')
        item_features = self.load('item_features')

        df.drop_duplicates(subset=[self.user_column_name, self.item_column_name], inplace=True)
        df = sklearn.utils.shuffle(df)
        df = df.head(n=int(self.max_data_size))

        user_ids = df[self.user_column_name].values
        item_ids = df[self.item_column_name].values
        ratings = df[self.rating_column_name].values

        dataset = GcmcDataset(user_ids=user_ids, item_ids=item_ids, ratings=ratings, user_features=user_features, item_features=item_features)
        graph_dataset = GcmcGraphDataset(dataset=dataset,
                                         test_size=self.test_size,
                                         min_user_click_count=self.min_user_click_count,
                                         max_user_click_count=self.max_user_click_count)
        model = GraphConvolutionalMatrixCompletion(graph_dataset=graph_dataset, **self.model_kwargs)
        self.task_log['report'] = [str(self.model_kwargs)] + model.fit(try_count=self.try_count, decay_speed=self.decay_speed)
        self.dump(self.task_log['report'], 'report')
        self.dump(model, 'model')
 def test_get_user_feature_with_new_items(self, dummy_get_user_feature):
     n_users = 10
     n_items = 20
     n_data = 3007
     am1 = _make_sparse_matrix(n_users, n_items, n_data)
     am2 = 2 * _make_sparse_matrix(n_users, n_items, n_data)
     adjacency_matrix = am1 + am2
     user_ids = adjacency_matrix.tocoo().row
     item_ids = adjacency_matrix.tocoo().col
     ratings = adjacency_matrix.tocoo().data
     item_features = [{i: np.array([i]) for i in range(n_items)}]
     dataset = GcmcDataset(user_ids,
                           item_ids,
                           ratings,
                           item_features=item_features)
     graph_dataset = GcmcGraphDataset(dataset, test_size=0.1)
     encoder_hidden_size = 100
     encoder_size = 100
     scope_name = 'GraphConvolutionalMatrixCompletionGraph'
     model = GraphConvolutionalMatrixCompletion(
         graph_dataset=graph_dataset,
         encoder_hidden_size=encoder_hidden_size,
         encoder_size=encoder_size,
         scope_name=scope_name,
         batch_size=1024,
         epoch_size=10,
         learning_rate=0.01,
         dropout_rate=0.7,
         normalization_type='symmetric')
     n_user_embed_dimension = 50
     dummy_get_user_feature.return_value = np.zeros(
         (len(user_ids) * len(item_ids), n_user_embed_dimension))
     user_features = model.get_user_feature_with_new_items(
         item_ids, additional_dataset=dataset, with_user_embedding=False)
     self.assertEqual(len(user_features[0]), n_users)
     self.assertEqual(user_features[1].shape,
                      (n_users, n_user_embed_dimension))
Пример #9
0
 def test_with_information(self):
     user_ids = np.array([1, 1, 2, 2, 2])
     item_ids = np.array([1, 2, 1, 2, 3])
     ratings = np.array([1, 0, 1, 0, 1])
     test_size = 0.0
     user_features = [{1: np.array([10, 11]), 2: np.array([20, 21])}]
     item_features = [{
         1: np.array([10, 11, 12]),
         2: np.array([20, 21, 22]),
         3: np.array([30, 31, 32])
     }]
     dataset = GcmcDataset(user_ids=user_ids,
                           item_ids=item_ids,
                           ratings=ratings,
                           user_features=user_features,
                           item_features=item_features)
     graph_dataset = GcmcGraphDataset(dataset, test_size)
     data = graph_dataset.train_data()
     self.assertEqual(user_ids.shape, data['user'].shape)
     self.assertEqual(item_ids.shape, data['item'].shape)
     self.assertEqual((ratings.shape[0], 2), data['label'].shape)
     self.assertEqual(ratings.shape, data['rating'].shape)
     self.assertEqual(user_ids.shape, data['user_feature_indices'].shape)
     self.assertEqual(item_ids.shape, data['item_feature_indices'].shape)
Пример #10
0
 def setUp(self) -> None:
     dataset = GcmcDataset(user_ids=np.array([0, 1, 2]), item_ids=np.array([10, 11, 12]), ratings=np.array([100, 101, 102]))
     self.graph_dataset = GcmcGraphDataset(dataset=dataset, test_size=0.1)
     self.additional_dataset = GcmcDataset(user_ids=np.array([1, 2, 3]), item_ids=np.array([13, 14, 15]), ratings=np.array([103, 101, 102]))