def test_get_item_feature_with_new_items(self): n_users = 101 n_items = 233 n_data = 3007 am1 = _make_sparse_matrix(n_users, n_items, n_data) am2 = 2 * _make_sparse_matrix(n_users, n_items, n_data) adjacency_matrix = am1 + am2 user_ids = adjacency_matrix.tocoo().row item_ids = adjacency_matrix.tocoo().col ratings = adjacency_matrix.tocoo().data item_features = [{i: np.array([i]) for i in range(n_items)}] dataset = GcmcDataset(user_ids, item_ids, ratings, item_features=item_features) graph_dataset = GcmcGraphDataset(dataset, test_size=0.1) encoder_hidden_size = 100 encoder_size = 100 scope_name = 'GraphConvolutionalMatrixCompletionGraph' model = GraphConvolutionalMatrixCompletion( graph_dataset=graph_dataset, encoder_hidden_size=encoder_hidden_size, encoder_size=encoder_size, scope_name=scope_name, batch_size=1024, epoch_size=10, learning_rate=0.01, dropout_rate=0.7, normalization_type='symmetric') model.fit() user_ids = [90, 62, 3, 3] item_ids = [11, 236, 240, 243] additional_item_features = { item_id: np.array([999]) for item_id in item_ids } additional_dataset = GcmcDataset( np.array(user_ids), np.array(item_ids), np.array([1, 2, 1, 1]), item_features=[additional_item_features]) target_item_ids = item_ids + [12, 13, 17, 55 ] # item_ids to get embeddings item_feature = model.get_item_feature_with_new_items( item_ids=target_item_ids, additional_dataset=additional_dataset) self.assertEqual(len(item_feature), 2) self.assertEqual(list(item_feature[0]), target_item_ids) self.assertEqual(item_feature[1].shape, (len(target_item_ids), encoder_size)) output_embedding = {k: v for k, v in zip(*item_feature)} np.testing.assert_almost_equal(output_embedding[240], output_embedding[243])
def test_run(self): # This tests that GraphConvolutionalMatrixCompletion runs without error, and its loss and rmse are small enough. n_users = 101 n_items = 233 n_data = 3007 am1 = _make_sparse_matrix(n_users, n_items, n_data) am2 = 2 * _make_sparse_matrix(n_users, n_items, n_data) adjacency_matrix = am1 + am2 user_ids = adjacency_matrix.tocoo().row item_ids = adjacency_matrix.tocoo().col ratings = adjacency_matrix.tocoo().data dataset = GcmcDataset(user_ids, item_ids, ratings) graph_dataset = GcmcGraphDataset(dataset, test_size=0.1) encoder_hidden_size = 100 encoder_size = 100 scope_name = 'GraphConvolutionalMatrixCompletionGraph' model = GraphConvolutionalMatrixCompletion( graph_dataset=graph_dataset, encoder_hidden_size=encoder_hidden_size, encoder_size=encoder_size, scope_name=scope_name, batch_size=1024, epoch_size=10, learning_rate=0.01, dropout_rate=0.7, normalization_type='symmetric') reports = model.fit() test_loss = float(reports[-1].split(',')[-2].split('=')[-1]) test_rmse = float(reports[-1].split(',')[-1].split('=')[-1][:-1]) self.assertLess(test_loss, 1.0) self.assertLess(test_rmse, 0.7)
def test_with_click_threshold(self): user_ids = np.array([1, 1, 2, 2, 2, 3]) item_ids = np.array([1, 2, 1, 2, 3, 1]) ratings = np.array([1, 0, 1, 0, 1, 0]) test_size = 0.0 user_features = [{ 1: np.array([10, 11]), 2: np.array([20, 21]), 3: np.array([30, 31]) }] item_features = [{ 1: np.array([10, 11, 12]), 2: np.array([20, 21, 22]), 3: np.array([30, 31, 32]) }] dataset = GcmcDataset(user_ids=user_ids, item_ids=item_ids, ratings=ratings, user_features=user_features, item_features=item_features) graph_dataset = GcmcGraphDataset(dataset, test_size, min_user_click_count=3) np.testing.assert_almost_equal([0, 0, 1, 1, 1, 0], graph_dataset._user.indices) np.testing.assert_almost_equal([1, 2, 1, 2, 3, 1], graph_dataset._item.indices) data = graph_dataset.train_data() self.assertEqual(item_ids.shape, graph_dataset._item.indices.shape) self.assertEqual((ratings.shape[0], 2), data['label'].shape) self.assertEqual(ratings.shape, data['rating'].shape) self.assertEqual(user_ids.shape, data['user_feature_indices'].shape) self.assertEqual(item_ids.shape, data['item_feature_indices'].shape)
def test_item_cold_start(self): n_users = 101 n_items = 233 n_data = 3007 am1 = _make_sparse_matrix(n_users, n_items, n_data) am2 = 2 * _make_sparse_matrix(n_users, n_items, n_data) adjacency_matrix = am1 + am2 user_ids = adjacency_matrix.tocoo().row item_ids = adjacency_matrix.tocoo().col ratings = adjacency_matrix.tocoo().data item_features = [{i: np.array([i]) for i in range(n_items)}] dataset = GcmcDataset(user_ids, item_ids, ratings, item_features=item_features) graph_dataset = GcmcGraphDataset(dataset, test_size=0.1) encoder_hidden_size = 100 encoder_size = 100 scope_name = 'GraphConvolutionalMatrixCompletionGraph' model = GraphConvolutionalMatrixCompletion( graph_dataset=graph_dataset, encoder_hidden_size=encoder_hidden_size, encoder_size=encoder_size, scope_name=scope_name, batch_size=1024, epoch_size=10, learning_rate=0.01, dropout_rate=0.7, normalization_type='symmetric') model.fit() user_ids = [90, 62] item_ids = [11, 236] # 236 is new items additional_dataset = GcmcDataset(np.array(user_ids), np.array(item_ids), np.array([1, 2]), item_features=[{ 236: np.array([236]) }]) results = model.predict_with_new_items( user_ids, item_ids, additional_dataset=additional_dataset) self.assertEqual(2, len(results)) self.assertIsNotNone(results[0]) self.assertIsNotNone(results[1])
def test_without_information(self): user_ids = np.array([1, 1, 2, 2, 2]) item_ids = np.array([1, 2, 1, 2, 3]) ratings = np.array([1, 0, 1, 0, 1]) rating_data = GcmcDataset(user_ids=user_ids, item_ids=item_ids, ratings=ratings) test_size = 0.0 dataset = GcmcGraphDataset(rating_data, test_size) data = dataset.train_data() self.assertEqual(user_ids.shape, data['user'].shape) self.assertEqual(item_ids.shape, data['item'].shape) self.assertEqual((ratings.shape[0], 2), data['label'].shape) self.assertEqual(ratings.shape, data['rating'].shape) self.assertEqual(user_ids.shape, data['user_feature_indices'].shape) self.assertEqual(item_ids.shape, data['item_feature_indices'].shape)
def test(self): # This tests that GraphConvolutionalMatrixCompletion runs without error, and its loss and rmse are small enough. n_users = 101 n_items = 233 n_data = 3007 am1 = _make_sparse_matrix(n_users, n_items, n_data) am2 = 2 * _make_sparse_matrix(n_users, n_items, n_data) adjacency_matrix = am1 + am2 user_ids = adjacency_matrix.tocoo().row item_ids = adjacency_matrix.tocoo().col ratings = adjacency_matrix.tocoo().data item_features = [{i: np.array([i]) for i in range(n_items)}] rating_data = GcmcDataset(user_ids, item_ids, ratings, item_features=item_features) dataset = GcmcGraphDataset(dataset=rating_data, test_size=0.2) self.assertEqual( (n_items + 1, 1), dataset.item_features[0].shape) # because of default index.
def run(self): tf.reset_default_graph() df = self.load_data_frame('train_data', required_columns={self.user_column_name, self.item_column_name, self.rating_column_name}) user_features = self.load('user_features') item_features = self.load('item_features') df.drop_duplicates(subset=[self.user_column_name, self.item_column_name], inplace=True) df = sklearn.utils.shuffle(df) df = df.head(n=int(self.max_data_size)) user_ids = df[self.user_column_name].values item_ids = df[self.item_column_name].values ratings = df[self.rating_column_name].values dataset = GcmcDataset(user_ids=user_ids, item_ids=item_ids, ratings=ratings, user_features=user_features, item_features=item_features) graph_dataset = GcmcGraphDataset(dataset=dataset, test_size=self.test_size, min_user_click_count=self.min_user_click_count, max_user_click_count=self.max_user_click_count) model = GraphConvolutionalMatrixCompletion(graph_dataset=graph_dataset, **self.model_kwargs) self.task_log['report'] = [str(self.model_kwargs)] + model.fit(try_count=self.try_count, decay_speed=self.decay_speed) self.dump(self.task_log['report'], 'report') self.dump(model, 'model')
def test_get_user_feature_with_new_items(self, dummy_get_user_feature): n_users = 10 n_items = 20 n_data = 3007 am1 = _make_sparse_matrix(n_users, n_items, n_data) am2 = 2 * _make_sparse_matrix(n_users, n_items, n_data) adjacency_matrix = am1 + am2 user_ids = adjacency_matrix.tocoo().row item_ids = adjacency_matrix.tocoo().col ratings = adjacency_matrix.tocoo().data item_features = [{i: np.array([i]) for i in range(n_items)}] dataset = GcmcDataset(user_ids, item_ids, ratings, item_features=item_features) graph_dataset = GcmcGraphDataset(dataset, test_size=0.1) encoder_hidden_size = 100 encoder_size = 100 scope_name = 'GraphConvolutionalMatrixCompletionGraph' model = GraphConvolutionalMatrixCompletion( graph_dataset=graph_dataset, encoder_hidden_size=encoder_hidden_size, encoder_size=encoder_size, scope_name=scope_name, batch_size=1024, epoch_size=10, learning_rate=0.01, dropout_rate=0.7, normalization_type='symmetric') n_user_embed_dimension = 50 dummy_get_user_feature.return_value = np.zeros( (len(user_ids) * len(item_ids), n_user_embed_dimension)) user_features = model.get_user_feature_with_new_items( item_ids, additional_dataset=dataset, with_user_embedding=False) self.assertEqual(len(user_features[0]), n_users) self.assertEqual(user_features[1].shape, (n_users, n_user_embed_dimension))
def test_with_information(self): user_ids = np.array([1, 1, 2, 2, 2]) item_ids = np.array([1, 2, 1, 2, 3]) ratings = np.array([1, 0, 1, 0, 1]) test_size = 0.0 user_features = [{1: np.array([10, 11]), 2: np.array([20, 21])}] item_features = [{ 1: np.array([10, 11, 12]), 2: np.array([20, 21, 22]), 3: np.array([30, 31, 32]) }] dataset = GcmcDataset(user_ids=user_ids, item_ids=item_ids, ratings=ratings, user_features=user_features, item_features=item_features) graph_dataset = GcmcGraphDataset(dataset, test_size) data = graph_dataset.train_data() self.assertEqual(user_ids.shape, data['user'].shape) self.assertEqual(item_ids.shape, data['item'].shape) self.assertEqual((ratings.shape[0], 2), data['label'].shape) self.assertEqual(ratings.shape, data['rating'].shape) self.assertEqual(user_ids.shape, data['user_feature_indices'].shape) self.assertEqual(item_ids.shape, data['item_feature_indices'].shape)
def setUp(self) -> None: dataset = GcmcDataset(user_ids=np.array([0, 1, 2]), item_ids=np.array([10, 11, 12]), ratings=np.array([100, 101, 102])) self.graph_dataset = GcmcGraphDataset(dataset=dataset, test_size=0.1) self.additional_dataset = GcmcDataset(user_ids=np.array([1, 2, 3]), item_ids=np.array([13, 14, 15]), ratings=np.array([103, 101, 102]))