Пример #1
0
Файл: run.py Проект: mnskim/saqa
 def build_dev_iterator():
     # Iterator for inference during training
     if config.bert:
         dev_context_buckets = get_buckets_bert(
             os.path.join(config.bert_dir, config.dev_bert_emb_context))
         dev_ques_buckets = get_buckets_bert(
             os.path.join(config.bert_dir, config.dev_bert_emb_ques))
         return DataIterator(dev_buckets,
                             config.batch_size,
                             config.para_limit,
                             config.ques_limit,
                             config.char_limit,
                             False,
                             config.sent_limit,
                             bert=True,
                             bert_buckets=list(
                                 zip(dev_context_buckets,
                                     dev_ques_buckets)))
     else:
         return DataIterator(dev_buckets,
                             config.batch_size,
                             config.para_limit,
                             config.ques_limit,
                             config.char_limit,
                             False,
                             config.sent_limit,
                             bert=False)
Пример #2
0
    def train_model(self):
        self.logger.info(self.evaluator.metrics_info())
        g_iter = DataIterator(np.arange(self.num_users), batch_size=self.batchSize_G, shuffle=True, drop_last=False)
        d_iter = DataIterator(np.arange(self.num_users), batch_size=self.batchSize_D, shuffle=True, drop_last=False)

        total_epochs = self.epochs
        total_epochs = int(total_epochs / self.step_G)
        for epoch in range(total_epochs):
            train_matrix, zr_matrix, pm_matrix = self.get_train_data()
            # training discriminator
            for d_epoch in range(self.step_D):
                for idx in d_iter:
                    train_data = train_matrix[idx].toarray()
                    train_mask = pm_matrix[idx].toarray()
                    feed = {self.real_data: train_data, self.mask: train_mask, self.condition: train_data}
                    self.sess.run(self.trainer_d, feed_dict=feed)

            # training generator
            for g_epoch in range(self.step_G):
                for idx in g_iter:
                    train_data = train_matrix[idx].toarray()
                    train_z_mask = zr_matrix[idx].toarray()
                    train_p_mask = pm_matrix[idx].toarray()
                    feed = {self.real_data: train_data, self.condition: train_data,
                            self.mask: train_p_mask, self.g_zr_dims: train_z_mask}
                    self.sess.run(self.trainer_g, feed_dict=feed)
            if epoch % self.verbose == 0:
                self.logger.info("epoch %d:\t%s" % (epoch, self.evaluate()))
Пример #3
0
Файл: run.py Проект: mnskim/saqa
 def build_train_iterator():
     if config.bert:
         train_context_buckets = get_buckets_bert(
             os.path.join(config.bert_dir, config.train_bert_emb_context))
         train_ques_buckets = get_buckets_bert(
             os.path.join(config.bert_dir, config.train_bert_emb_ques))
         return DataIterator(train_buckets,
                             config.batch_size,
                             config.para_limit,
                             config.ques_limit,
                             config.char_limit,
                             True,
                             config.sent_limit,
                             bert=True,
                             bert_buckets=list(
                                 zip(train_context_buckets,
                                     train_ques_buckets)),
                             new_spans=True)
     else:
         return DataIterator(train_buckets,
                             config.batch_size,
                             config.para_limit,
                             config.ques_limit,
                             config.char_limit,
                             True,
                             config.sent_limit,
                             bert=False,
                             new_spans=True)
Пример #4
0
    def __init__(self,
                 dataset,
                 neg_num=1,
                 batch_size=1,
                 shuffle=False,
                 drop_last=False):
        """Initializes a new `PairwiseSampler` instance.

        Args:
            dataset (data.Dataset): An instance of `Dataset`.
            neg_num (int): How many negative items for each positive item.
                Defaults to `1`.
            batch_size (int): How many samples per batch to load.
                Defaults to `1`.
            shuffle (bool): Whether reshuffling the samples at every epoch.
                Defaults to `False`.
            drop_last (bool): Whether dropping the last incomplete batch.
                Defaults to `False`.
        """
        super(PairwiseSampler, self).__init__(batch_size=batch_size,
                                              drop_last=drop_last)
        self.shuffle = shuffle
        self.neg_num = neg_num
        self.item_unm = dataset.num_items
        user_pos_dict = dataset.get_user_train_dict()
        self.user_pos_dict = {
            user: set(items)
            for user, items in user_pos_dict.items()
        }
        user_list, item_list = dataset.get_train_interactions()
        self.ui_interactions = DataIterator(user_list,
                                            item_list,
                                            batch_size=1,
                                            shuffle=self.shuffle,
                                            drop_last=False)
Пример #5
0
    def train_model(self):
        self.logger.info(self.evaluator.metrics_info())
        for epoch in range(1, self.num_epochs + 1):
            user_input, num_idx, item_input, labels = \
                data_generator._get_pointwise_all_likefism_data(self.dataset, self.num_negatives, self.train_dict)
            data_iter = DataIterator(user_input,
                                     num_idx,
                                     item_input,
                                     labels,
                                     batch_size=self.batch_size,
                                     shuffle=True)

            num_training_instances = len(user_input)
            total_loss = 0.0
            training_start_time = time()
            for bat_users, bat_idx, bat_items, bat_labels in data_iter:
                bat_users = pad_sequences(bat_users, value=self.num_items)
                feed_dict = {
                    self.user_input: bat_users,
                    self.num_idx: bat_idx,
                    self.item_input: bat_items,
                    self.labels: bat_labels,
                    self.is_train_phase: True
                }
                loss, _ = self.sess.run((self.loss, self.optimizer),
                                        feed_dict=feed_dict)
                total_loss += loss
            self.logger.info("[iter %d : loss : %f, time: %f]" %
                             (epoch, total_loss / num_training_instances,
                              time() - training_start_time))
            if epoch % self.verbose == 0:
                self.logger.info("epoch %d:\t%s" % (epoch, self.evaluate()))
Пример #6
0
def _sampling_negative_items(user_pos_len, neg_num, item_num, user_pos_dict):
    if neg_num <= 0:
        raise ValueError("'neg_num' must be a positive integer.")

    users, n_pos = list(zip(*user_pos_len))
    users_n_pos = DataIterator(users,
                               n_pos,
                               batch_size=1024,
                               shuffle=False,
                               drop_last=False)
    neg_items_list = []
    for bat_user, batch_num in users_n_pos:
        batch_num = [num * neg_num for num in batch_num]
        exclusion = [user_pos_dict[u] for u in bat_user]
        bat_neg_items = batch_randint_choice(item_num,
                                             batch_num,
                                             replace=True,
                                             exclusion=exclusion)

        for user, neg_items, n_item in zip(bat_user, bat_neg_items, batch_num):
            if isinstance(neg_items, Iterable):
                if neg_num > 1:
                    neg_items = np.reshape(neg_items, newshape=[-1, neg_num])
                neg_items_list.extend(neg_items)
            else:
                neg_items_list.append(neg_items)
    return neg_items_list
Пример #7
0
    def train_model(self):
        self.logger.info(self.evaluator.metrics_info())
        self.user_pos_train = csr_to_user_dict_bytime(
            self.dataset.time_matrix, self.dataset.train_matrix)
        users_list, item_seq_list, item_pos_list = self._generate_sequences()
        for epoch in range(self.epochs):
            item_neg_list = self._sample_negative(users_list)
            data = DataIterator(users_list,
                                item_seq_list,
                                item_pos_list,
                                item_neg_list,
                                batch_size=self.batch_size,
                                shuffle=True)
            for bat_user, bat_item_seq, bat_item_pos, bat_item_neg in data:
                feed = {
                    self.user_ph: bat_user,
                    self.item_seq_ph: bat_item_seq,
                    self.item_pos_ph: bat_item_pos,
                    self.item_neg_ph: bat_item_neg,
                    self.is_training: True
                }

                self.sess.run(self.train_opt, feed_dict=feed)

            result = self.evaluate_model()
            self.logger.info("epoch %d:\t%s" % (epoch, result))
Пример #8
0
    def train_model(self):
        self.logger.info(self.evaluator.metrics_info())

        for epoch in range(self.epochs):
            item_seq_list, item_pos_list, item_neg_list = self.get_train_data(
            )  # , user_list
            data = DataIterator(
                item_seq_list,
                item_pos_list,
                item_neg_list,  # user_list,
                batch_size=self.batch_size,
                shuffle=True)
            for bat_item_seq, bat_item_pos, bat_item_neg in data:  # bat_user,
                feed = {
                    # self.user_ph: bat_user,
                    self.item_seq_ph: bat_item_seq,
                    self.item_pos_ph: bat_item_pos,
                    self.item_neg_ph: bat_item_neg,
                    self.is_training: True
                }

                self.sess.run(self.train_opt, feed_dict=feed)

            result = self.evaluate_model()
            self.logger.info("epoch %d:\t%s" % (epoch, result))
Пример #9
0
    def train_model(self):
        self.logger.info(self.evaluator.metrics_info())
        for epoch in range(self.num_epochs):
            user_input, item_input_pos, item_input_social, item_input_neg, suk_input = self._get_pairwise_all_data(
            )
            data_iter = DataIterator(user_input,
                                     item_input_pos,
                                     item_input_social,
                                     item_input_neg,
                                     suk_input,
                                     batch_size=self.batch_size,
                                     shuffle=True)
            total_loss = 0.0
            training_start_time = time()
            num_training_instances = len(user_input)
            for bat_users, bat_items_pos, bat_items_social, bat_items_neg, bat_suk_input in data_iter:
                feed_dict = {
                    self.user_input: bat_users,
                    self.item_input_pos: bat_items_pos,
                    self.item_input_social: bat_items_social,
                    self.item_input_neg: bat_items_neg,
                    self.suk: bat_suk_input
                }

                loss, _ = self.sess.run((self.loss, self.optimizer),
                                        feed_dict=feed_dict)
                total_loss += loss
            self.logger.info("[iter %d : loss : %f, time: %f]" %
                             (epoch, total_loss / num_training_instances,
                              time() - training_start_time))
            if epoch % self.verbose == 0:
                self.logger.info("epoch %d:\t%s" % (epoch, self.evaluate()))
Пример #10
0
    def train_model(self):
        self.logger.info(self.evaluator.metrics_info())
        for epoch in range(self.num_epochs):
            # Generate training instances
            mask_corruption_np = np.random.binomial(
                1, 1 - self.corruption_level, (self.num_users, self.num_items))

            total_loss = 0.0
            all_users = np.arange(self.num_users)
            users_iter = DataIterator(all_users,
                                      batch_size=self.batch_size,
                                      shuffle=True,
                                      drop_last=False)
            training_start_time = time()
            for batch_set_idx in users_iter:
                batch_matrix = np.zeros((len(batch_set_idx), self.num_items))
                for idx, user_id in enumerate(batch_set_idx):
                    items_by_user_id = self.train_dict[user_id]
                    batch_matrix[idx, items_by_user_id] = 1

                feed_dict = {
                    self.mask_corruption: mask_corruption_np[batch_set_idx, :],
                    self.input_R: batch_matrix
                }
                _, loss = self.sess.run([self.optimizer, self.loss],
                                        feed_dict=feed_dict)
                total_loss += loss
            self.logger.info("[iter %d : loss : %f, time: %f]" %
                             (epoch, total_loss / self.num_users,
                              time() - training_start_time))
            if epoch % self.verbose == 0:
                self.logger.info("epoch %d:\t%s" % (epoch, self.evaluate()))
Пример #11
0
 def _shuffle_index(self, seq_index):
     index_chunks = DataIterator(seq_index,
                                 batch_size=self.batch_size * 32,
                                 shuffle=False,
                                 drop_last=False)  # chunking
     index_chunks = list(index_chunks)
     index_chunks_iter = DataIterator(
         index_chunks, batch_size=1, shuffle=True,
         drop_last=False)  # shuffle index chunk
     for indexes in index_chunks_iter:
         indexes = indexes[0]
         indexes_iter = DataIterator(indexes,
                                     batch_size=self.batch_size,
                                     shuffle=True,
                                     drop_last=True)  # shuffle batch index
         for bat_index in indexes_iter:
             yield bat_index
Пример #12
0
    def __iter__(self):
        data_iter = DataIterator(self.users_list,
                                 self.pos_items_list,
                                 batch_size=self.batch_size,
                                 shuffle=self.shuffle,
                                 drop_last=self.drop_last)

        for bat_users, bat_items in data_iter:
            yield bat_users, bat_items
Пример #13
0
 def build_dev_iterator():
     dev_dataset = HotpotDataset(dev_buckets)
     return DataIterator(dev_dataset,
                         config.para_limit,
                         config.ques_limit,
                         config.char_limit,
                         config.sent_limit,
                         batch_size=config.batch_size,
                         num_workers=2)
Пример #14
0
    def __iter__(self):
        neg_items_list = _sampling_negative_items(self.user_pos_len, self.neg_num,
                                                  self.item_num, self.user_pos_dict)

        data_iter = DataIterator(self.users_list, self.recent_items_list, self.pos_items_list, neg_items_list,
                                 batch_size=self.batch_size, shuffle=self.shuffle, drop_last=self.drop_last)

        for bat_users, bat_recent_items, bat_pos_items, bat_neg_items in data_iter:
            yield bat_users, bat_recent_items, bat_pos_items, bat_neg_items
Пример #15
0
    def evaluate(self, model, test_users=None):
        """Evaluate `model`.

        Args:
            model: The model need to be evaluated. This model must have
                a method `predict_for_eval(self, users)`, where the argument
                `users` is a list of users and the return is a 2-D array that
                contains `users` rating/ranking scores on all items.

        Returns:
            str: A single-line string consist of all results, such as
                `"0.18663847    0.11239596    0.35824192    0.21479650"`.
        """
        # B: batch size
        # N: the number of items
        test_users = test_users if test_users is not None else list(self.user_pos_test.keys())
        if not isinstance(test_users, (list, tuple, set, np.ndarray)):
            raise TypeError("'test_user' must be a list, tuple, set or numpy array!")

        test_users = DataIterator(test_users, batch_size=self.batch_size,
                                  shuffle=False, drop_last=False)
        batch_result = []
        for batch_users in test_users:
            if self.user_neg_test is not None:
                candidate_items = [list(self.user_pos_test[u]) + self.user_neg_test[u] for u in batch_users]
                test_items = [set(range(len(self.user_pos_test[u]))) for u in batch_users]

                ranking_score = model.predict(batch_users, candidate_items)  # (B,N)
                ranking_score = pad_sequences(ranking_score, value=-np.inf, dtype=np.float32)

                ranking_score = np.array(ranking_score)
            else:
                test_items = [self.user_pos_test[u] for u in batch_users]
                ranking_score = model.predict(batch_users, None)  # (B,N)
                ranking_score = np.array(ranking_score)

                # set the ranking scores of training items to -inf,
                # then the training items will be sorted at the end of the ranking list.
                for idx, user in enumerate(batch_users):
                    if user in self.user_pos_train and len(self.user_pos_train[user]) > 0:
                        train_items = self.user_pos_train[user]
                        ranking_score[idx][train_items] = -np.inf

            result = self.eval_score_matrix(ranking_score, test_items, self.metrics,
                                            top_k=self.max_top, thread_num=self.num_thread)  # (B,k*metric_num)
            batch_result.append(result)

        # concatenate the batch results to a matrix
        all_user_result = np.concatenate(batch_result, axis=0)  # (num_users, metrics_num*max_top)
        final_result = np.mean(all_user_result, axis=0)  # (1, metrics_num*max_top)

        final_result = np.reshape(final_result, newshape=[self.metrics_num, self.max_top])  # (metrics_num, max_top)
        final_result = final_result[:, self.top_show - 1]
        final_result = np.reshape(final_result, newshape=[-1])
        buf = '\t'.join([("%.8f" % x).ljust(12) for x in final_result])
        return buf
Пример #16
0
 def build_train_iterator():
     train_dataset = HotpotDataset(train_buckets)
     return DataIterator(train_dataset,
                         config.para_limit,
                         config.ques_limit,
                         config.char_limit,
                         config.sent_limit,
                         batch_size=config.batch_size,
                         sampler=RandomSampler(train_dataset),
                         num_workers=2)
Пример #17
0
    def __init__(self,
                 dataset,
                 high_order=1,
                 neg_num=1,
                 batch_size=1,
                 shuffle=False,
                 drop_last=False):
        """Initializes a new `TimeOrderPairwiseSampler` instance.

        Args:
            dataset (data.Dataset): An instance of `Dataset`.
            high_order (int): The number of recent items. Defaults to `1`.
            neg_num (int): How many negative items for each positive item.
                Defaults to `1`.
            batch_size (int): How many samples per batch to load.
                Defaults to `1`.
            shuffle (bool): Whether reshuffling the samples at every epoch.
                Defaults to `False`.
            drop_last (bool): Whether dropping the last incomplete batch.
                Defaults to `False`.
        """
        super(TimeOrderPairwiseSampler, self).__init__(batch_size=batch_size,
                                                       drop_last=drop_last)
        if high_order < 0:
            raise ValueError("'high_order' can be a negative integer!")

        self.shuffle = shuffle
        self.neg_num = neg_num
        self.item_unm = dataset.num_items
        user_pos_dict = dataset.get_user_train_dict(by_time=True)
        self.user_pos_dict = {
            user: set(items)
            for user, items in user_pos_dict.items()
        }

        user_list, recent_items_list, next_item_list = [], [], []
        for user, seq_items in user_pos_dict.items():
            num_instance = len(seq_items) - high_order
            user_list.extend([user] * num_instance)
            if high_order == 1:
                r_items = [seq_items[idx] for idx in range(num_instance)]
            else:
                r_items = [
                    seq_items[idx:][:high_order] for idx in range(num_instance)
                ]

            recent_items_list.extend(r_items)
            next_item_list.extend(seq_items[high_order:])

        self.ui_interactions = DataIterator(user_list,
                                            recent_items_list,
                                            next_item_list,
                                            batch_size=1,
                                            shuffle=self.shuffle,
                                            drop_last=False)
Пример #18
0
    def __iter__(self):
        users_list, pos_items_list, neg_items_list = \
            _pairwise_sampling_v2(self.user_pos_dict, self.num_trainings, self.item_num)

        data_iter = DataIterator(users_list,
                                 pos_items_list,
                                 neg_items_list,
                                 batch_size=self.batch_size,
                                 shuffle=self.shuffle,
                                 drop_last=self.drop_last)
        for bat_users, bat_pos_items, bat_neg_items in data_iter:
            yield bat_users, bat_pos_items, bat_neg_items
Пример #19
0
    def __iter__(self):
        # uniformly sampling a batch of users, neg_num can larger than 1
        users_list, pos_items_list, neg_items_list = \
            _pairwise_sampling_v3(self.user_pos_dict, self.num_trainings, self.item_num, self.neg_num)

        data_iter = DataIterator(users_list,
                                 pos_items_list,
                                 neg_items_list,
                                 batch_size=self.batch_size,
                                 shuffle=self.shuffle,
                                 drop_last=self.drop_last)
        for bat_users, bat_pos_items, bat_neg_items in data_iter:
            yield bat_users, bat_pos_items, bat_neg_items
Пример #20
0
    def __iter__(self):
        neg_items_list = _sampling_negative_items(self.user_pos_len, self.neg_num,
                                                  self.item_num, self.user_pos_dict)

        neg_items = np.array(neg_items_list, dtype=np.int32)
        neg_items = np.reshape(neg_items.T, [-1]).tolist()
        all_next_items = self.pos_items_list + neg_items

        data_iter = DataIterator(self.users_list, self.recent_items_list, all_next_items, self.all_labels,
                                 batch_size=self.batch_size, shuffle=self.shuffle, drop_last=self.drop_last)

        for bat_users, bat_recent_items, bat_next_items, bat_labels in data_iter:
            yield bat_users, bat_recent_items, bat_next_items, bat_labels
Пример #21
0
Файл: run.py Проект: mnskim/saqa
 def build_dev_iterator():
     if config.bert:
         return DataIterator(dev_buckets,
                             config.batch_size,
                             para_limit,
                             ques_limit,
                             config.char_limit,
                             False,
                             config.sent_limit,
                             bert=True,
                             bert_buckets=list(
                                 zip(dev_context_buckets,
                                     dev_ques_buckets)))
     else:
         return DataIterator(dev_buckets,
                             config.batch_size,
                             para_limit,
                             ques_limit,
                             config.char_limit,
                             False,
                             config.sent_limit,
                             bert=False)
Пример #22
0
def train_script(model):

    bce_criterion = torch.nn.BCEWithLogitsLoss()  # torch.nn.BCELoss()
    adam_optimizer = torch.optim.Adam(model.parameters(),
                                      lr=model.lr,
                                      betas=(0.9, 0.98))

    model = model.to(model.dev)
    for epoch in range(model.epochs):
        item_seq_list, item_pos_list, item_neg_list = model.get_train_data()
        data = DataIterator(item_seq_list,
                            item_pos_list,
                            item_neg_list,
                            batch_size=model.batch_size,
                            shuffle=True)
        # data = torch.tensor(data).cuda()
        for seq, pos, neg in data:
            seq, pos, neg = np.array(seq), np.array(pos), np.array(neg)

            # seq = torch.LongTensor(seq).cuda()

            # seq = torch.from_numpy(seq)
            # pos = torch.from_numpy(pos)
            # neg = torch.from_numpy(neg)
            #
            # seq = Variable(seq).cuda()
            # pos = Variable(pos).cuda()
            # neg = Variable(neg).cuda()

            pos_logits, neg_logits, all = model(seq, pos, neg)
            pos_labels, neg_labels = torch.ones(pos_logits.shape,
                                                device=model.dev), torch.zeros(
                                                    neg_logits.shape,
                                                    device=model.dev)
            adam_optimizer.zero_grad()
            indices = np.where(pos != 0)
            loss = bce_criterion(pos_logits[indices], pos_labels[indices])
            loss += bce_criterion(neg_logits[indices], neg_labels[indices])
            for param in model.item_emb.parameters():
                loss += model.l2_emb * torch.norm(param)
            loss.backward()
            adam_optimizer.step()
        if epoch % 100 == 0:
            result = model.evaluate_model()
            model.logger.info("epoch %d:\t%s" % (epoch, result))
Пример #23
0
    def evaluate(self, model):
        # B: batch size
        # N: the number of items
        test_users = DataIterator(list(self.user_pos_test.keys()), batch_size=self.batch_size, shuffle=False, drop_last=False)
        batch_result = []
        for batch_users in test_users:
            if self.user_neg_test is not None:
                candidate_items = []
                for user in batch_users:
                    num_item = len(self.user_pos_test[user])
                    if num_item != 1:
                        raise ValueError("the number of test item of user %d is %d" % (user, num_item))
                    candidate_items.append([self.user_pos_test[user][0]] + self.user_neg_test[user])
                test_items = [0] * len(batch_users)
                ranking_score = model.predict(batch_users, candidate_items)  # (B,N)
                ranking_score = np.array(ranking_score)
            else:
                test_items = []
                for user in batch_users:
                    num_item = len(self.user_pos_test[user])
                    if num_item != 1:
                        raise ValueError("the number of test item of user %d is %d" % (user, num_item))
                    test_items.append(self.user_pos_test[user][0])
                ranking_score = model.predict(batch_users, None)  # (B,N)
                ranking_score = np.array(ranking_score)

                # set the ranking scores of training items to -inf,
                # then the training items will be sorted at the end of the ranking list.
                for idx, user in enumerate(batch_users):
                    train_items = self.user_pos_train[user]
                    ranking_score[idx][train_items] = -np.inf

            result = eval_score_matrix_loo(ranking_score, test_items, top_k=self.max_top, thread_num=None)  # (B,k*metric_num)
            batch_result.append(result)

        # concatenate the batch results to a matrix
        all_user_result = np.concatenate(batch_result, axis=0)
        final_result = np.mean(all_user_result, axis=0)  # mean

        final_result = np.reshape(final_result, newshape=[self.metrics_num, self.max_top])
        final_result = final_result[:, self.top_show-1]
        final_result = np.reshape(final_result, newshape=[-1])
        buf = '\t'.join([("%.8f" % x).ljust(12) for x in final_result])
        return buf
Пример #24
0
    def predict(self, users, items):
        users = DataIterator(users,
                             batch_size=self.batch_size,
                             shuffle=False,
                             drop_last=False)
        all_ratings = []
        for bat_user in users:
            cur_batch_size = len(bat_user)
            bat_items = [
                self.user_pos_train[user][-self.max_seq_len:]
                for user in bat_user
            ]
            bat_adj_in, bat_adj_out, bat_alias, bat_items, bat_mask = self._build_session_graph(
                bat_items)
            if cur_batch_size < self.batch_size:  # padding
                pad_size = self.batch_size - cur_batch_size
                bat_adj_in = np.concatenate(
                    [bat_adj_in, [bat_adj_in[-1]] * pad_size], axis=0)
                bat_adj_out = np.concatenate(
                    [bat_adj_out, [bat_adj_out[-1]] * pad_size], axis=0)
                bat_alias = np.concatenate(
                    [bat_alias, [bat_alias[-1]] * pad_size], axis=0)
                bat_items = np.concatenate(
                    [bat_items, [bat_items[-1]] * pad_size], axis=0)
                bat_mask = np.concatenate(
                    [bat_mask, [bat_mask[-1]] * pad_size], axis=0)

            feed = {
                self.item_ph: bat_items,
                self.adj_in_ph: bat_adj_in,
                self.adj_out_ph: bat_adj_out,
                self.alias_ph: bat_alias,
                self.mask_ph: bat_mask
            }
            bat_ratings = self.sess.run(self.all_logits, feed_dict=feed)
            all_ratings.extend(bat_ratings[:cur_batch_size])
        all_ratings = np.array(all_ratings)
        if items is not None:
            all_ratings = [
                all_ratings[idx][u_item] for idx, u_item in enumerate(items)
            ]

        return all_ratings
Пример #25
0
    def predict(self, user_ids, items=None):
        users = DataIterator(user_ids,
                             batch_size=64,
                             shuffle=False,
                             drop_last=False)
        all_ratings = []
        for bat_user in users:
            last_items = [self.train_dict[u][-1] for u in bat_user]
            feed = {
                self.user_input: bat_user,
                self.item_input_recent: last_items
            }
            bat_ratings = self.sess.run(self.prediction, feed_dict=feed)
            all_ratings.append(bat_ratings)
        all_ratings = np.vstack(all_ratings)

        if items is not None:
            all_ratings = [
                all_ratings[idx][item] for idx, item in enumerate(items)
            ]

        return all_ratings
Пример #26
0
 def predict(self, users, items=None):
     users = DataIterator(users,
                          batch_size=512,
                          shuffle=False,
                          drop_last=False)
     all_ratings = []
     for bat_user in users:
         bat_seq = [self.user_pos_train[u] for u in bat_user]
         bat_seq = pad_sequences(bat_seq,
                                 value=self.items_num,
                                 max_len=self.max_len,
                                 padding='pre',
                                 truncating='pre')
         feed = {self.item_seq_ph: bat_seq, self.is_training: False}
         bat_ratings = self.sess.run(self.all_logits, feed_dict=feed)
         all_ratings.extend(bat_ratings)
     all_ratings = np.array(all_ratings, dtype=np.float32)
     if items is not None:
         all_ratings = [
             all_ratings[idx][item] for idx, item in enumerate(items)
         ]
     return all_ratings
Пример #27
0
def predict_script(model, users, items=None):
    users = DataIterator(users, batch_size=512, shuffle=False, drop_last=False)
    all_ratings = []
    for bat_user in users:
        bat_seq = [model.user_pos_train[u] for u in bat_user]
        bat_seq = pad_sequences(bat_seq,
                                value=model.items_num,
                                max_len=model.max_len,
                                padding='pre',
                                truncating='pre')
        bat_pos = [model.user_pos_train[u][1:] for u in bat_user]
        n_neg_items = [len(pos) for pos in bat_pos]
        exclusion = [model.user_pos_train[u] for u in bat_user]
        bat_neg = batch_randint_choice(model.items_num,
                                       n_neg_items,
                                       replace=True,
                                       exclusion=exclusion)

        bat_pos = pad_sequences(bat_pos,
                                value=model.items_num,
                                max_len=model.max_len,
                                padding='pre',
                                truncating='pre')
        bat_neg = pad_sequences(bat_neg,
                                value=model.items_num,
                                max_len=model.max_len,
                                padding='pre',
                                truncating='pre')

        _, _x, bat_ratings = model(bat_seq, bat_pos, bat_neg)
        all_ratings.extend(bat_ratings)
    all_ratings = [t.detach().cpu().numpy() for t in all_ratings]
    # all_ratings = np.array(all_ratings, dtype=np.float32)
    if items is not None:
        all_ratings = [
            all_ratings[idx][item] for idx, item in enumerate(items)
        ]
    return all_ratings
Пример #28
0
    def _sample_negative(self, users_list):
        neg_items_list = []
        user_neg_items_dict = {}
        all_uni_user, all_counts = np.unique(users_list, return_counts=True)
        user_count = DataIterator(all_uni_user,
                                  all_counts,
                                  batch_size=1024,
                                  shuffle=False)
        for bat_users, bat_counts in user_count:
            n_neg_items = [c * self.neg_samples for c in bat_counts]
            exclusion = [self.user_pos_train[u] for u in bat_users]
            bat_neg = batch_randint_choice(self.items_num,
                                           n_neg_items,
                                           replace=True,
                                           exclusion=exclusion)
            for u, neg in zip(bat_users, bat_neg):
                user_neg_items_dict[u] = neg

        for u, c in zip(all_uni_user, all_counts):
            neg_items = np.reshape(user_neg_items_dict[u],
                                   newshape=[c, self.neg_samples])
            neg_items_list.extend(neg_items)
        return neg_items_list
Пример #29
0
    def predict(self, users, items=None):
        users = DataIterator(users,
                             batch_size=512,
                             shuffle=False,
                             drop_last=False)
        all_ratings = []
        for bat_user in users:
            bat_seq = [self.user_test_seq[u] for u in bat_user]
            feed = {
                self.user_ph: bat_user,
                self.item_seq_ph: bat_seq,
                self.is_training: False
            }
            bat_ratings = self.sess.run(self.all_logits, feed_dict=feed)
            all_ratings.extend(bat_ratings)
        all_ratings = np.array(all_ratings, dtype=np.float32)

        if items is not None:
            all_ratings = [
                all_ratings[idx][item] for idx, item in enumerate(items)
            ]

        return all_ratings
Пример #30
0
    def get_train_data(self):
        item_seq_list, item_pos_list, item_neg_list = [], [], []
        all_users = DataIterator(list(self.user_pos_train.keys()),
                                 batch_size=1024,
                                 shuffle=False)
        for bat_users in all_users:
            bat_seq = [self.user_pos_train[u][:-1] for u in bat_users]
            bat_pos = [self.user_pos_train[u][1:] for u in bat_users]
            n_neg_items = [len(pos) for pos in bat_pos]
            exclusion = [self.user_pos_train[u] for u in bat_users]
            bat_neg = batch_randint_choice(self.items_num,
                                           n_neg_items,
                                           replace=True,
                                           exclusion=exclusion)

            # padding
            bat_seq = pad_sequences(bat_seq,
                                    value=self.items_num,
                                    max_len=self.max_len,
                                    padding='pre',
                                    truncating='pre')
            bat_pos = pad_sequences(bat_pos,
                                    value=self.items_num,
                                    max_len=self.max_len,
                                    padding='pre',
                                    truncating='pre')
            bat_neg = pad_sequences(bat_neg,
                                    value=self.items_num,
                                    max_len=self.max_len,
                                    padding='pre',
                                    truncating='pre')

            item_seq_list.extend(bat_seq)
            item_pos_list.extend(bat_pos)
            item_neg_list.extend(bat_neg)

        return item_seq_list, item_pos_list, item_neg_list  # , user_list