Exemplo n.º 1
0
 def test_train_regression_model(self):
     user_vocab_size = 15
     item_vocab_size = 20
     embedding_dim = 5
     page_size = 3
     user_model = SVDPPUserModel(item_attrs=[])
     regression_model = RegressionPredictionModel(
         config={
             'rating': {
                 'context': 'item',
                 'vocab_size': item_vocab_size,
                 'embedding_dim': embedding_dim
             }
         })
     model_builder = RecommenderBuilder(
         user_model,
         regression_model,
         page_size=page_size,
         attr2config={
             'rating': {
                 'is_numerical': True,
                 'level': 'item'
             },
             'item': {
                 'vocab_size': item_vocab_size,
                 'embedding_dim': embedding_dim,
                 'is_numerical': False,
                 'level': 'item'
             },
             'user': {
                 'vocab_size': user_vocab_size,
                 'embedding_dim': embedding_dim,
                 'is_numerical': False,
                 'level': 'user'
             }
         },
         embedding_attrs=['user'],
         target2config={
             'rating': {
                 'weight': 1.0,
                 'MAE': {
                     'context': 'item',
                 },
                 'RMSE': {
                     'context': 'item',
                 },
             }
         },
         eval_metrics='MAE RMSE',
     )
     batches = []
     batch_size = 4
     for i in range(10):
         max_seq_len = random.randint(5, 10)
         batch = {
             'user_idx': [],
             'item_idx': [],
             'rating_val': [],
             'sequence_length_val': []
         }
         for l in range(batch_size):
             batch['user_idx'].append(
                 [random.randint(1, user_vocab_size - 1)])
             batch['sequence_length_val'].append(
                 [random.randint(2, max_seq_len) * page_size])
             batch['item_idx'].append([])
             batch['rating_val'].append([])
             for j in range(max_seq_len):
                 for k in range(page_size):
                     if j < batch['sequence_length_val'][l][0] / page_size:
                         idx = random.randint(1, item_vocab_size - 1)
                         act_dice = random.random()
                         if act_dice < 0.5:
                             batch['item_idx'][l].append(idx)
                             batch['rating_val'][l].append(
                                 random.randint(1, 5) * 1.0)
                         else:
                             batch['item_idx'][l].append(0)
                             batch['rating_val'][l].append(0.0)
                     else:
                         batch['rating_val'][l].append(0.0)
                         batch['item_idx'][l].append(0)
         batches.append(batch)
     train_data = JsonListDataSet(batches)
     run_name = 'test_regression_l2_model'
     export_dir = self._export_dir + run_name
     shutil.rmtree(export_dir, ignore_errors=True)
     model_trainer = ModelTrainer(train_data,
                                  builder=model_builder,
                                  max_steps=10,
                                  tensorboard_dir=self._test_path,
                                  export_dir=export_dir)
     model_trainer.train(run_name)
Exemplo n.º 2
0
 def test_train_sequence_sampled_bpr_model(self):
     embedding_dim = 10
     user_vocab_size = 15
     item_vocab_size = 20
     rnn_size = 5
     page_size = 3
     user_model = SequenceUserModel(rnn_size)
     bpr_model = SampledBPRModel(config={
         'item': {
             'vocab_size': item_vocab_size,
             'embedding_dim': rnn_size
         }
     })
     model_builder = RecommenderBuilder(
         user_model,
         bpr_model,
         page_size=page_size,
         attr2config={
             'item': {
                 'vocab_size': item_vocab_size,
                 'embedding_dim': embedding_dim,
                 'is_numerical': False,
                 'level': 'item'
             },
             'user': {
                 'vocab_size': user_vocab_size,
                 'embedding_dim': embedding_dim,
                 'is_numerical': False,
                 'level': 'user'
             }
         },
         target2config={'item': {
             'weight': 1.0
         }},
     )
     batches = []
     batch_size = 4
     for i in range(10):
         max_seq_len = random.randint(5, 10)
         batch = {'user_idx': [], 'item_idx': [], 'sequence_length_val': []}
         for l in range(batch_size):
             batch['user_idx'].append(
                 [random.randint(1, user_vocab_size - 1)])
             batch['sequence_length_val'].append(
                 [random.randint(2, max_seq_len) * page_size])
             batch['item_idx'].append([])
             for j in range(max_seq_len):
                 for k in range(page_size):
                     if j < batch['sequence_length_val'][l][0] / page_size:
                         batch['item_idx'][l].append(
                             random.randint(1, item_vocab_size - 1))
                     else:
                         batch['item_idx'][l].append(0)
         batches.append(batch)
     train_data = JsonListDataSet(batches)
     run_name = 'test_sequence_sampled_bpr_model'
     export_dir = self._export_dir + run_name
     shutil.rmtree(export_dir, ignore_errors=True)
     model_trainer = ModelTrainer(train_data,
                                  builder=model_builder,
                                  max_steps=10,
                                  tensorboard_dir=self._test_path,
                                  export_dir=export_dir)
     model_trainer.train(run_name)
Exemplo n.º 3
0
 def test_train_ctr_model(self):
     user_vocab_size = 15
     item_vocab_size = 20
     embedding_dim = 5
     page_size = 3
     user_model = SVDPPUserModel(item_attrs=[])
     sigmoid_model = CTRPredictionModel('display',
                                        config={
                                            'item': {
                                                'vocab_size':
                                                item_vocab_size,
                                                'embedding_dim':
                                                embedding_dim,
                                                'user_bias': 'user'
                                            },
                                            'user': {
                                                'vocab_size':
                                                user_vocab_size
                                            }
                                        })
     model_builder = RecommenderBuilder(
         user_model,
         sigmoid_model,
         page_size=page_size,
         attr2config={
             'display': {
                 'vocab_size': item_vocab_size,
                 'embedding_dim': embedding_dim,
                 'is_numerical': False,
                 'level': 'item'
             },
             'item': {
                 'vocab_size': item_vocab_size,
                 'embedding_dim': embedding_dim,
                 'is_numerical': False,
                 'level': 'item'
             },
             'user': {
                 'vocab_size': user_vocab_size,
                 'embedding_dim': embedding_dim,
                 'is_numerical': False,
                 'level': 'user'
             }
         },
         embedding_attrs=['user'],
         target2config={'item': {
             'weight': 1.0
         }},
     )
     batches = []
     batch_size = 4
     for i in range(10):
         max_seq_len = random.randint(5, 10)
         batch = {
             'user_idx': [],
             'item_idx': [],
             'display_idx': [],
             'sequence_length_val': []
         }
         for l in range(batch_size):
             batch['user_idx'].append(
                 [random.randint(1, user_vocab_size - 1)])
             batch['sequence_length_val'].append(
                 [random.randint(2, max_seq_len) * page_size])
             batch['item_idx'].append([])
             batch['display_idx'].append([])
             for j in range(max_seq_len):
                 for k in range(page_size):
                     if j < batch['sequence_length_val'][l][0] / page_size:
                         idx = random.randint(1, item_vocab_size - 1)
                         batch['display_idx'][l].append(idx)
                         act_dice = random.random()
                         if act_dice < 0.5:
                             batch['item_idx'][l].append(idx)
                         else:
                             batch['item_idx'][l].append(0)
                     else:
                         batch['display_idx'][l].append(0)
                         batch['item_idx'][l].append(0)
         batches.append(batch)
     train_data = JsonListDataSet(batches)
     run_name = 'test_ctr_sigmoid_model'
     export_dir = self._export_dir + run_name
     shutil.rmtree(export_dir, ignore_errors=True)
     model_trainer = ModelTrainer(train_data,
                                  builder=model_builder,
                                  max_steps=10,
                                  tensorboard_dir=self._test_path,
                                  export_dir=export_dir)
     model_trainer.train(run_name)