示例#1
0
 def l2(self, out_dict):
     """
     模型l2计算,默认是所有参数(除了embedding之外)的平方和,
     Embedding 的 L2是 只计算当前batch用到的
     :return:
     
     Compute the l2 term of the model, by default it's the square sum of all parameters (except for embedding)
     The l2 norm of embedding only consider those embeddings used in the current batch
     :return:
     """
     l2 = utils.numpy_to_torch(np.array(0.0, dtype=np.float32), gpu=True)
     for name, p in self.named_parameters():
         if not p.requires_grad:
             continue
         if self.l2_bias == 0 and 'bias' in name:
             continue
         if name.split('.')[0] in self.l2_embeddings:
             continue
         l2 += (p ** 2).sum()
     b_l2 = utils.numpy_to_torch(np.array(0.0, dtype=np.float32), gpu=True)
     for p in out_dict[EMBEDDING_L2]:
         b_l2 += (p ** 2).sum()
     if self.loss_sum == 0:
         l2_batch = out_dict[TOTAL_BATCH_SIZE] if L2_BATCH not in out_dict else out_dict[L2_BATCH]
         b_l2 = b_l2 / l2_batch
     return l2 + b_l2
示例#2
0
 def _get_feed_dict_rk(self,
                       data,
                       batch_start,
                       batch_size,
                       train,
                       neg_data=None):
     if not train:
         feed_dict = self._get_feed_dict_rt(data=data,
                                            batch_start=batch_start,
                                            batch_size=batch_size,
                                            train=train)
         feed_dict['rank'] = 1
     else:
         batch_end = min(len(data['X']), batch_start + batch_size)
         real_batch_size = batch_end - batch_start
         neg_columns_dict = {}
         if neg_data is None:
             logging.warning('neg_data is None')
             neg_df = self.generate_neg_df(
                 uid_list=data['uid'][batch_start:batch_start +
                                      real_batch_size],
                 iid_list=data['iid'][batch_start:batch_start +
                                      real_batch_size],
                 df=self.data_loader.train_df,
                 neg_n=1,
                 train=True)
             neg_data = self.format_data_dict(neg_df)
             for c in self.data_columns:
                 neg_columns_dict[c] = neg_data[c]
         else:
             for c in self.data_columns:
                 neg_columns_dict[c] = neg_data[c][batch_start:batch_start +
                                                   real_batch_size]
         y = np.concatenate([
             np.ones(shape=real_batch_size, dtype=np.float32),
             np.zeros(shape=real_batch_size, dtype=np.float32)
         ])
         sample_id = data[global_p.K_SAMPLE_ID][batch_start:batch_start +
                                                real_batch_size]
         neg_sample_id = sample_id + len(self.train_data['Y'])
         feed_dict = {
             'train': train,
             'rank': 1,
             'Y': utils.numpy_to_torch(y),
             global_p.K_SAMPLE_ID:
             np.concatenate([sample_id, neg_sample_id])
         }
         for c in self.data_columns:
             feed_dict[c] = utils.numpy_to_torch(
                 np.concatenate([
                     data[c][batch_start:batch_start + real_batch_size],
                     neg_columns_dict[c]
                 ]))
     return feed_dict
示例#3
0
 def get_feed_dict(self, corpus, data, batch_start, batch_size, phase):
     feed_dict = BaseModel.get_feed_dict(self, corpus, data, batch_start,
                                         batch_size, phase)
     real_batch_size = feed_dict['batch_size']
     history_items = data['item_his'][batch_start:batch_start +
                                      real_batch_size].values
     lengths = data['his_length'][batch_start:batch_start +
                                  real_batch_size].values
     feed_dict['history_items'] = utils.numpy_to_torch(
         utils.pad_lst(history_items))
     feed_dict['lengths'] = utils.numpy_to_torch(lengths)
     return feed_dict
示例#4
0
 def _get_feed_dict_rt(self, data, batch_start, batch_size, train):
     """
     generate a batch for rating/clicking prediction
     :param data: data dict,generated by self.get_*_data() and self.format_data_dict()
     :param batch_start: start index of current batch
     :param batch_size: batch size
     :param train: train or validation/test
     :return: batch的feed dict
     """
     batch_end = min(len(data['X']), batch_start + batch_size)
     real_batch_size = batch_end - batch_start
     feed_dict = {
         'train':
         train,
         'rank':
         0,
         global_p.K_SAMPLE_ID:
         data[global_p.K_SAMPLE_ID][batch_start:batch_start +
                                    real_batch_size]
     }
     if 'Y' in data:
         feed_dict['Y'] = utils.numpy_to_torch(
             data['Y'][batch_start:batch_start + real_batch_size])
     else:
         feed_dict['Y'] = utils.numpy_to_torch(
             np.zeros(shape=real_batch_size))
     for c in self.data_columns:
         d = data[c][batch_start:batch_start + real_batch_size]
         if c == global_p.C_HISTORY and self.sparse_his == 1:
             x, y = [], []
             for idx, iids in enumerate(d):
                 x.extend([idx] * len(iids))
                 y.extend(iids)
             if len(x) <= 0:
                 i = torch.LongTensor([[0], [0]])
                 v = torch.FloatTensor([0.0])
             else:
                 i = torch.LongTensor([x, y])
                 v = torch.FloatTensor([1.0] * len(x))
             history = torch.sparse.FloatTensor(
                 i, v,
                 torch.Size([real_batch_size, self.data_loader.item_num]))
             if torch.cuda.device_count() > 0:
                 history = history.cuda()
             feed_dict[c] = history
         else:
             feed_dict[c] = utils.numpy_to_torch(d)
     return feed_dict
示例#5
0
 def _init_weights(self):
     self.iid_embeddings = torch.nn.Embedding(self.item_num,
                                              self.ui_vector_size)
     self.uid_embeddings = torch.nn.Embedding(self.user_num,
                                              self.ui_vector_size)
     self.true = torch.nn.Parameter(utils.numpy_to_torch(
         np.random.uniform(0, 0.1,
                           size=self.ui_vector_size).astype(np.float32)),
                                    requires_grad=False)
     self.not_layer_1 = torch.nn.Linear(self.ui_vector_size,
                                        self.ui_vector_size)
     self.not_layer_2 = torch.nn.Linear(self.ui_vector_size,
                                        self.ui_vector_size)
     self.and_layer_1 = torch.nn.Linear(2 * self.ui_vector_size,
                                        self.ui_vector_size)
     self.and_layer_2 = torch.nn.Linear(self.ui_vector_size,
                                        self.ui_vector_size)
     self.or_layer_1 = torch.nn.Linear(2 * self.ui_vector_size,
                                       self.ui_vector_size)
     self.or_layer_2 = torch.nn.Linear(self.ui_vector_size,
                                       self.ui_vector_size)
     self.purchase_layer_1 = torch.nn.Linear(2 * self.ui_vector_size,
                                             self.ui_vector_size)
     self.purchase_layer_2 = torch.nn.Linear(self.ui_vector_size,
                                             self.ui_vector_size)
示例#6
0
    def get_feed_dict(self, corpus, data, batch_start, batch_size, phase):
        batch_end = min(len(data), batch_start + batch_size)
        real_batch_size = batch_end - batch_start
        user_ids = data['user_id'][batch_start:batch_start +
                                   real_batch_size].values
        item_ids = data['item_id'][batch_start:batch_start +
                                   real_batch_size].values
        history_items = data['item_his'][batch_start:batch_start +
                                         real_batch_size].values
        history_times = data['time_his'][batch_start:batch_start +
                                         real_batch_size].values
        times = data['time'][batch_start:batch_start + real_batch_size].values

        neg_items = self.get_neg_items(corpus, data, batch_start,
                                       real_batch_size, phase)
        item_ids = np.concatenate([np.expand_dims(item_ids, -1), neg_items],
                                  axis=1)

        # Find information related to the target item:
        # - category id
        # - time intervals w.r.t. recent relational interactions (-1 if not existing)
        category_ids = np.array([[self.item2cate[x] for x in candidate_lst]
                                 for candidate_lst in item_ids])
        relational_intervals = list()
        for r_idx in range(0, self.relation_num):
            intervals = np.ones_like(item_ids) * -1.
            for i, candidate_lst in enumerate(item_ids):
                for j, target_item in enumerate(candidate_lst):
                    for k in range(len(history_items[i]))[::-1]:
                        if (history_items[i][k], r_idx,
                                target_item) in corpus.triplet_set:
                            intervals[i][j] = times[i] - history_times[i][k]
                            break
            relational_intervals.append(intervals)
        relational_intervals = np.stack(relational_intervals,
                                        axis=2) / self.time_scalar

        feed_dict = {
            'user_id': utils.numpy_to_torch(user_ids),  # [batch_size]
            'item_id': utils.numpy_to_torch(item_ids),  # [batch_size, -1]
            'category_id':
            utils.numpy_to_torch(category_ids),  # [batch_size, -1]
            'relational_interval': utils.numpy_to_torch(
                relational_intervals),  # [batch_size, -1, relation_num]
            'batch_size': real_batch_size
        }
        return feed_dict
示例#7
0
 def loss(self, feed_dict, predictions):
     if self.stage == 1:
         real_batch_size = feed_dict['batch_size']
         pos_pred, neg_pred = predictions[:real_batch_size * 2], predictions[real_batch_size * 2:]
         loss = self.kg_loss(pos_pred, neg_pred, utils.numpy_to_torch(np.ones(real_batch_size * 2)))
     else:
         loss = BaseModel.loss(self, feed_dict, predictions)
     return loss
示例#8
0
 def get_feed_dict(self, corpus, data, batch_start, batch_size, phase):
     feed_dict = BaseModel.get_feed_dict(self, corpus, data, batch_start,
                                         batch_size, phase)
     real_batch_size = feed_dict['batch_size']
     user_ids = data['user_id'][batch_start:batch_start +
                                real_batch_size].values
     feed_dict['user_id'] = utils.numpy_to_torch(user_ids)  # [batch_size]
     return feed_dict
示例#9
0
 def get_feed_dict(self, corpus, data, batch_start, batch_size, phase):
     feed_dict = BPR.get_feed_dict(self, corpus, data, batch_start,
                                   batch_size, phase)
     real_batch_size = feed_dict['batch_size']
     times = data['time'][batch_start:batch_start + real_batch_size].values
     time_ids = (times - self.min_time) // self.time_bin_width
     feed_dict['time_id'] = utils.numpy_to_torch(time_ids).long()
     return feed_dict
示例#10
0
 def loss(self, feed_dict, predictions):
     real_batch_size, predictions = feed_dict[
         'batch_size'], predictions.flatten()
     pos_pred, neg_pred = predictions[:real_batch_size *
                                      2], predictions[real_batch_size * 2:]
     loss = self.loss_function(
         pos_pred, neg_pred,
         utils.numpy_to_torch(np.ones(real_batch_size * 2)))
     return loss.double()
示例#11
0
 def loss(self, predictions):
     if self.stage == 1:
         batch_size = predictions.shape[0]
         pos_pred, neg_pred = predictions[:, :2].flatten(
         ), predictions[:, 2:].flatten()
         loss = self.kg_loss(pos_pred, neg_pred,
                             utils.numpy_to_torch(np.ones(batch_size * 2)))
     else:
         loss = super().loss(predictions)
     return loss
示例#12
0
    def get_feed_dict(self, corpus, data, batch_start, batch_size, phase):
        batch_end = min(len(data), batch_start + batch_size)
        real_batch_size = batch_end - batch_start

        if phase == 'train':
            head_ids = data['head'][batch_start:batch_start +
                                    real_batch_size].values
            tail_ids = data['tail'][batch_start:batch_start +
                                    real_batch_size].values
            relation_ids = data['relation'][batch_start:batch_start +
                                            real_batch_size].values
            neg_heads, neg_tails = self.sample_negative_triplet(
                corpus, real_batch_size, head_ids, tail_ids, relation_ids)
            head_ids = head_ids + (relation_ids >
                                   0).astype(int) * self.user_num
            neg_heads = neg_heads + (relation_ids >
                                     0).astype(int) * self.user_num
            head_ids = np.concatenate(
                (head_ids, head_ids, head_ids, neg_heads))
            tail_ids = np.concatenate(
                (tail_ids, tail_ids, neg_tails, tail_ids))
            tail_ids = np.expand_dims(tail_ids, -1)
            relation_ids = np.tile(relation_ids, 4)
        else:
            head_ids = data['user_id'][batch_start:batch_start +
                                       real_batch_size].values
            item_ids = data['item_id'][batch_start:batch_start +
                                       real_batch_size].values
            neg_items = data['neg_items'][batch_start:batch_start +
                                          real_batch_size].tolist()
            tail_ids = np.concatenate(
                [np.expand_dims(item_ids, -1), neg_items], axis=1)
            relation_ids = np.zeros_like(head_ids)
        tail_ids = tail_ids + self.user_num

        feed_dict = {
            'head_id': utils.numpy_to_torch(head_ids),  # [batch_size]
            'relation_id': utils.numpy_to_torch(relation_ids),  # [batch_size]
            'tail_id': utils.numpy_to_torch(tail_ids),  # [batch_size, -1]
            'batch_size': real_batch_size
        }
        return feed_dict
示例#13
0
    def get_feed_dict(self, *args, **kwargs):
        feed_dict = HistoryDP.get_feed_dict(self, *args, **kwargs)

        total_batch_size = feed_dict[TOTAL_BATCH_SIZE]
        # anchor_users
        if self.anchor_users is not None:
            uids = list(feed_dict[UID].cpu().numpy())
            anchor_uids = [
                self.anchor_users[i] if i in self.anchor_users else -1
                for i in uids
            ]
            feed_dict[K_ANCHOR_USER] = utils.numpy_to_torch(np.array(
                anchor_uids, dtype=np.int64),
                                                            gpu=False)
            # print(self.anchor_users)
            # print(feed_dict[K_ANCHOR_USER])
            # assert 1 == 2
        else:
            feed_dict[K_ANCHOR_USER] = utils.numpy_to_torch(
                -np.ones(total_batch_size, dtype=np.int64), gpu=False)
        return feed_dict
示例#14
0
 def l2(self):
     # calculate l2 of a batch manually for observation
     l2 = utils.numpy_to_torch(np.array(0.0, dtype=np.float64), gpu=True)
     for name, p in filter(lambda x: x[1].requires_grad,
                           self.named_parameters()):
         if ('bias' not in name) and (name.split('.')[0]
                                      not in self.embeddings):
             l2 += (p**2).sum()
     # only include embeddings utilized in the current batch
     for p in self.embedding_l2:
         l2 += (p**2).sum() / p.shape[0]
     return l2
示例#15
0
文件: BaseModel.py 项目: ZSCDumin/NLR
 def l2(self, out_dict):
     """
     模型l2计算,默认是所有参数(除了embedding之外)的平方和,
     Embedding 的 L2是 只计算当前batch用到的
     :return:
     """
     l2 = utils.numpy_to_torch(np.array(0.0, dtype=np.float32), gpu=True)
     for name, p in self.named_parameters():
         if not p.requires_grad:
             continue
         if self.l2_bias == 0 and 'bias' in name:
             continue
         if name.split('.')[0] in self.l2_embeddings:
             continue
         l2 += (p ** 2).sum()
     b_l2 = utils.numpy_to_torch(np.array(0.0, dtype=np.float32), gpu=True)
     for p in out_dict[EMBEDDING_L2]:
         b_l2 += (p ** 2).sum()
     if self.loss_sum == 0:
         l2_batch = out_dict[TOTAL_BATCH_SIZE] if L2_BATCH not in out_dict else out_dict[L2_BATCH]
         b_l2 = b_l2 / l2_batch
     return l2 + b_l2
示例#16
0
 def _get_feed_dict_rt(self, data, batch_start, batch_size, train):
     batch_end = min(len(data['X']), batch_start + batch_size)
     real_batch_size = batch_end - batch_start
     feed_dict = {
         'train':
         train,
         'rank':
         0,
         global_p.K_SAMPLE_ID:
         data[global_p.K_SAMPLE_ID][batch_start:batch_start +
                                    real_batch_size]
     }
     if 'Y' in data:
         feed_dict['Y'] = utils.numpy_to_torch(
             data['Y'][batch_start:batch_start + real_batch_size])
     else:
         feed_dict['Y'] = utils.numpy_to_torch(
             np.zeros(shape=real_batch_size))
     for c in self.data_columns:
         feed_dict[c] = utils.numpy_to_torch(
             data[c][batch_start:batch_start + real_batch_size])
     return feed_dict
示例#17
0
    def get_feed_dict(self,
                      data,
                      batch_start,
                      batch_size,
                      train,
                      neg_data=None,
                      special_cols=None):
        """
        topn模型产生一个batch,如果是训练需要对每个正样本采样一个负样本,保证每个batch前一半是正样本,后一半是对应的负样本
        :param data: data dict,由self.get_*_data()和self.format_data_dict()系列函数产生
        :param batch_start: batch开始的index
        :param batch_size: batch大小
        :param train: 训练还是测试
        :param neg_data: 负例的data dict,如果已经有可以传入拿来用
        :param special_cols: 需要特殊处理的column
        :return: batch的feed dict
        
        topn model will produce a batch, if doing training then need to sample a negative example for each positive example, and garanttee that for each batch the first half are positive examples and the second half are negative examples
        :param data: data dict, produced by self.get_*_data() and self.format_data_dict() functions
        :param batch_start: starting index of the batch
        :param batch_size: batch size
        :param train: training or testing
        :param neg_data: data dict of negative examples, if alreay exist can use directly
        :param special_cols: columns that need special treatment
        :return: feed dict of the batch
        """
        feed_dict = DataProcessor.get_feed_dict(
            self,
            data,
            batch_start,
            batch_size,
            train,
            neg_data=neg_data,
            special_cols=[C_HISTORY, C_HISTORY_NEG] if special_cols is None
            else [C_HISTORY, C_HISTORY_NEG] + special_cols)

        assert C_HISTORY_NEG not in feed_dict
        d = [[i for i in x] for x in feed_dict[C_HISTORY]]
        if train and self.shuffle_his:
            d = [
                list(np.random.choice(x, len(x), replace=False))
                if len(x) != 0 else [] for x in d
            ]

        lengths = [len(iids) for iids in d]
        max_length = max(lengths)
        new_d = np.array([x + [0] * (max_length - len(x)) for x in d])

        feed_dict[C_HISTORY] = utils.numpy_to_torch(new_d, gpu=False)
        feed_dict[C_HISTORY_LENGTH] = lengths
        return feed_dict
示例#18
0
    def __init__(self, args, corpus):
        self.stage = args.stage
        self.kg_lr = args.lr_scale * args.lr
        self.margin = args.margin
        self.base_method = args.base_method

        assert self.stage in [1, 2]
        self.pretrain_path = '../model/KG/KG__{}__emb_size={}__margin={}.pt'\
            .format(corpus.dataset, args.emb_size, self.margin)
        if self.stage == 1:
            args.model_path = self.pretrain_path

        SLRC.__init__(self, args, corpus)
        self.relation_range = utils.numpy_to_torch(np.arange(self.relation_num))
示例#19
0
    def kg_feed_dict(self, corpus, data, batch_start, real_batch_size):
        head_ids = data['head'][batch_start: batch_start + real_batch_size].values
        tail_ids = data['tail'][batch_start: batch_start + real_batch_size].values
        relation_ids = data['relation'][batch_start: batch_start + real_batch_size].values
        neg_tails = np.random.randint(1, self.item_num, size=real_batch_size)
        neg_heads = np.random.randint(1, self.item_num, size=real_batch_size)
        for i in range(real_batch_size):
            while (head_ids[i], relation_ids[i], neg_tails[i]) in corpus.triplet_set:
                neg_tails[i] = np.random.randint(1, self.item_num)
            while (neg_heads[i], relation_ids[i], tail_ids[i]) in corpus.triplet_set:
                neg_heads[i] = np.random.randint(1, self.item_num)
        head_ids = np.concatenate((head_ids, head_ids, head_ids, neg_heads))
        tail_ids = np.concatenate((tail_ids, tail_ids, neg_tails, tail_ids))
        relation_ids = np.tile(relation_ids, 4)

        # the head and tail are reversed because the relations we want are is_complement_of, is_substitute_of,
        # which are reversed in terms of the original also_buy, also_view
        feed_dict = {
            'head_id': utils.numpy_to_torch(tail_ids),          # [batch_size]
            'tail_id': utils.numpy_to_torch(head_ids),          # [batch_size]
            'relation_id': utils.numpy_to_torch(relation_ids),  # [batch_size]
            'batch_size': real_batch_size
        }
        return feed_dict
示例#20
0
 def l2(self, out_dict):
     """
     模型l2计算,默认是所有参数(除了embedding之外)的平方和,
     Embedding 的 L2是 只计算当前batch用到的
     :return:
     """
     l2 = utils.numpy_to_torch(np.array(0.0, dtype=np.float32), gpu=True)
     for name, p in self.named_parameters():
         if not p.requires_grad:
             continue
         if name.split('.')[0] in self.l2_embeddings:
             continue
         l2 += (p**2).sum()
     for p in out_dict[EMBEDDING_L2]:
         l2 += (p**2).sum()
     return l2
示例#21
0
    def get_feed_dict(self,
                      data,
                      batch_start,
                      batch_size,
                      train,
                      neg_data=None,
                      special_cols=None):
        """
        topn模型产生一个batch,如果是训练需要对每个正样本采样一个负样本,保证每个batch前一半是正样本,后一半是对应的负样本
        :param data: data dict,由self.get_*_data()和self.format_data_dict()系列函数产生
        :param batch_start: batch开始的index
        :param batch_size: batch大小
        :param train: 训练还是测试
        :param neg_data: 负例的data dict,如果已经有可以传入拿来用
        :param special_cols: 需要特殊处理的column
        :return: batch的feed dict
        """
        feed_dict = DataProcessor.get_feed_dict(
            self,
            data,
            batch_start,
            batch_size,
            train,
            neg_data=neg_data,
            special_cols=[C_HISTORY, C_HISTORY_NEG] if special_cols is None
            else [C_HISTORY, C_HISTORY_NEG] + special_cols)

        assert C_HISTORY_NEG not in feed_dict
        d = [[i for i in x] for x in feed_dict[C_HISTORY]]
        if train and self.shuffle_his:
            d = [
                list(np.random.choice(x, len(x), replace=False))
                if len(x) != 0 else [] for x in d
            ]

        lengths = [len(iids) for iids in d]
        max_length = max(lengths)
        new_d = np.array([x + [0] * (max_length - len(x)) for x in d])

        feed_dict[C_HISTORY] = utils.numpy_to_torch(new_d, gpu=False)
        feed_dict[C_HISTORY_LENGTH] = lengths
        return feed_dict
示例#22
0
 def get_feed_dict(self,
                   data,
                   batch_start,
                   batch_size,
                   train,
                   neg_data=None,
                   special_cols=None):
     feed_dict = DataProcessor.get_feed_dict(self,
                                             data,
                                             batch_start,
                                             batch_size,
                                             train,
                                             neg_data=neg_data,
                                             special_cols=[X])
     x = [[i for i in s] for s in feed_dict[X]]
     if train:
         if self.shuffle_and:
             x = [[
                 list(np.random.choice(o, size=len(o), replace=False))
                 for o in s
             ] for s in x]
         if self.shuffle_or:
             for s in x:
                 np.random.shuffle(s)
     max_or_length = max([len(s) for s in x])
     for s in x:
         while len(s) < max_or_length:
             s.append([])
     max_and_length = max(
         [max([len(s[i]) for s in x]) for i in range(max_or_length)])
     x = [[
         s[i] + [0] * (max_and_length - len(s[i]))
         for i in range(max_or_length)
     ] for s in x]
     or_length = [len(i) for i in x[0]]
     feed_dict[X] = utils.numpy_to_torch(np.array(x), gpu=False)
     feed_dict[K_OR_LENGTH] = or_length
     # print(feed_dict)
     # assert 1==2
     return feed_dict
示例#23
0
    def _init_weights(self):
        self.feature_embeddings = torch.nn.Embedding(self.variable_num,
                                                     self.v_vector_size)
        self.l2_embeddings = ['feature_embeddings']

        self.true = torch.nn.Parameter(utils.numpy_to_torch(
            np.random.uniform(0, 1,
                              size=[1,
                                    self.v_vector_size]).astype(np.float32)),
                                       requires_grad=False)

        self.not_layer = torch.nn.Linear(self.v_vector_size,
                                         self.v_vector_size)
        for i in range(self.layers):
            setattr(self, 'not_layer_%d' % i,
                    torch.nn.Linear(self.v_vector_size, self.v_vector_size))

        self.and_layer = torch.nn.Linear(self.v_vector_size * 2,
                                         self.v_vector_size)
        for i in range(self.layers):
            setattr(
                self, 'and_layer_%d' % i,
                torch.nn.Linear(self.v_vector_size * 2,
                                self.v_vector_size * 2))

        self.or_layer = torch.nn.Linear(self.v_vector_size * 2,
                                        self.v_vector_size)
        for i in range(self.layers):
            setattr(
                self, 'or_layer_%d' % i,
                torch.nn.Linear(self.v_vector_size * 2,
                                self.v_vector_size * 2))

        self.sim_layer = torch.nn.Linear(self.v_vector_size, 1)
        for i in range(self.layers):
            setattr(self, 'sim_layer_%d' % i,
                    torch.nn.Linear(self.v_vector_size, self.v_vector_size))
示例#24
0
    def get_feed_dict(self,
                      data,
                      batch_start,
                      batch_size,
                      train,
                      neg_data=None,
                      special_cols=None):
        feed_dict = DataProcessor.get_feed_dict(self,
                                                data,
                                                batch_start,
                                                batch_size,
                                                train,
                                                neg_data=neg_data,
                                                special_cols=[X])

        lengths = [len(seq) for seq in feed_dict[X]]
        max_length = max(lengths)
        new_d = np.array(
            [x + [0] * (max_length - len(x)) for x in feed_dict[X]])
        feed_dict[X] = utils.numpy_to_torch(new_d, gpu=False)
        feed_dict[K_S_LENGTH] = lengths
        # print(feed_dict)
        # assert 1 == 2
        return feed_dict
示例#25
0
 def get_feed_dict(self, corpus, data, batch_start, batch_size, phase):
     """
     Generate a batch of the given data, which will be fed into forward function.
     :param corpus: Loader object
     :param data: DataFrame in corpus.data_df (may be shuffled)
     :param batch_start: batch start index
     :param batch_size: batch size
     :param phase: 'train', 'dev' or 'test'
     """
     batch_end = min(len(data), batch_start + batch_size)
     real_batch_size = batch_end - batch_start
     item_ids = data['item_id'][batch_start:batch_start +
                                real_batch_size].values
     neg_items = self.get_neg_items(corpus, data, batch_start,
                                    real_batch_size,
                                    phase)  # [batch_size, num_neg]
     # concatenate ground-truth item and corresponding negative items
     item_ids = np.concatenate([np.expand_dims(item_ids, -1), neg_items],
                               axis=1)
     feed_dict = {
         'item_id': utils.numpy_to_torch(item_ids),
         'batch_size': real_batch_size
     }
     return feed_dict
示例#26
0
 def __init__(self, args, corpus):
     self.max_his = args.history_max
     self.num_layers = args.num_layers
     self.dropout = args.dropout
     self.len_range = utils.numpy_to_torch(np.arange(self.max_his))
     GRU4Rec.__init__(self, args, corpus)
示例#27
0
 def _get_feed_dict_rk(self,
                       data,
                       batch_start,
                       batch_size,
                       train,
                       neg_data=None):
     if not train:
         feed_dict = self._get_feed_dict_rt(data=data,
                                            batch_start=batch_start,
                                            batch_size=batch_size,
                                            train=train)
         feed_dict['rank'] = 1
     else:
         batch_end = min(len(data['X']), batch_start + batch_size)
         real_batch_size = batch_end - batch_start
         neg_columns_dict = {}
         if neg_data is None:
             logging.warning('neg_data is None')
             neg_df = self.generate_neg_df(
                 uid_list=data['uid'][batch_start:batch_start +
                                      real_batch_size],
                 iid_list=data['iid'][batch_start:batch_start +
                                      real_batch_size],
                 df=self.data_loader.train_df,
                 neg_n=1,
                 train=True)
             neg_data = self.format_data_dict(neg_df)
             for c in self.data_columns:
                 neg_columns_dict[c] = neg_data[c]
         else:
             for c in self.data_columns:
                 neg_columns_dict[c] = neg_data[c][batch_start:batch_start +
                                                   real_batch_size]
         y = np.concatenate([
             np.ones(shape=real_batch_size, dtype=np.float32),
             np.zeros(shape=real_batch_size, dtype=np.float32)
         ])
         sample_id = data[global_p.K_SAMPLE_ID][batch_start:batch_start +
                                                real_batch_size]
         neg_sample_id = sample_id + len(self.train_data['Y'])
         feed_dict = {
             'train': train,
             'rank': 1,
             'Y': utils.numpy_to_torch(y),
             global_p.K_SAMPLE_ID:
             np.concatenate([sample_id, neg_sample_id])
         }
         for c in self.data_columns:
             d = np.concatenate([
                 data[c][batch_start:batch_start + real_batch_size],
                 neg_columns_dict[c]
             ])
             if c == global_p.C_HISTORY and self.sparse_his == 1:
                 x, y = [], []
                 for idx, iids in enumerate(d):
                     x.extend([idx] * len(iids))
                     y.extend(iids)
                 if len(x) <= 0:
                     i = torch.LongTensor([[0], [0]])
                     v = torch.FloatTensor([0.0])
                 else:
                     i = torch.LongTensor([x, y])
                     v = torch.FloatTensor([1.0] * len(x))
                 history = torch.sparse.FloatTensor(
                     i, v,
                     torch.Size(
                         [real_batch_size * 2, self.data_loader.item_num]))
                 if torch.cuda.device_count() > 0:
                     history = history.cuda()
                 feed_dict[c] = history
             else:
                 feed_dict[c] = utils.numpy_to_torch(d)
     return feed_dict
示例#28
0
文件: HistoryDP.py 项目: ZSCDumin/NLR
    def get_feed_dict(self,
                      data,
                      batch_start,
                      batch_size,
                      train,
                      neg_data=None,
                      special_cols=None):
        """
        topn模型产生一个batch,如果是训练需要对每个正样本采样一个负样本,保证每个batch前一半是正样本,后一半是对应的负样本
        :param data: data dict,由self.get_*_data()和self.format_data_dict()系列函数产生
        :param batch_start: batch开始的index
        :param batch_size: batch大小
        :param train: 训练还是测试
        :param neg_data: 负例的data dict,如果已经有可以传入拿来用
        :param special_cols: 需要特殊处理的column
        :return: batch的feed dict
        """
        feed_dict = DataProcessor.get_feed_dict(
            self,
            data,
            batch_start,
            batch_size,
            train,
            neg_data=neg_data,
            special_cols=[C_HISTORY, C_HISTORY_NEG] if special_cols is None
            else [C_HISTORY, C_HISTORY_NEG] + special_cols)

        his_cs, his_ls = [C_HISTORY], [C_HISTORY_LENGTH]
        if C_HISTORY_NEG in feed_dict:  # 如果有负向历史的列
            his_cs.append(C_HISTORY_NEG)
            his_ls.append(C_HISTORY_NEG_LENGTH)

        for i, c in enumerate(his_cs):
            lc, d = his_ls[i], feed_dict[c]
            if self.sparse_his == 1:  # 如果是稀疏表示
                x, y, v = [], [], []
                for idx, iids in enumerate(d):
                    x.extend([idx] * len(iids))
                    y.extend([abs(iid) for iid in iids])
                    v.extend([
                        1.0 if iid > 0 else -1.0 if iid < 0 else 0
                        for iid in iids
                    ])
                if len(x) <= 0:
                    i = utils.numpy_to_torch(np.array([[0], [0]]), gpu=False)
                    v = utils.numpy_to_torch(np.array([0.0], dtype=np.float32),
                                             gpu=False)
                else:
                    i = utils.numpy_to_torch(np.array([x, y]), gpu=False)
                    v = utils.numpy_to_torch(np.array(v, dtype=np.float32),
                                             gpu=False)
                history = torch.sparse.FloatTensor(
                    i, v, torch.Size([len(d), self.data_loader.item_num]))
                # if torch.cuda.device_count() > 0:
                #     history = history.cuda()
                feed_dict[c] = history
                feed_dict[lc] = [len(iids) for iids in d]
                # feed_dict[lc] = utils.numpy_to_torch(np.array([len(iids) for iids in d]), gpu=False)
            else:
                lengths = [len(iids) for iids in d]
                max_length = max(lengths)
                new_d = np.array([x + [0] * (max_length - len(x)) for x in d])
                feed_dict[c] = utils.numpy_to_torch(new_d, gpu=False)
                feed_dict[lc] = lengths
                # feed_dict[lc] = utils.numpy_to_torch(np.array(lengths), gpu=False)
        # print(feed_dict)
        # assert 1==2
        return feed_dict
示例#29
0
    def logic_regularizer(self, out_dict, train):
        check_list = out_dict[CHECK]
        constraint = out_dict['constraint']
        constraint_valid = out_dict['constraint_valid']
        false = self.logic_not(self.true)

        # # regularizer
        # length
        r_length = constraint.norm(dim=2).sum()
        check_list.append(('r_length', r_length))

        # not
        r_not_true = self.similarity(false, self.true)
        r_not_true = r_not_true.sum()
        check_list.append(('r_not_true', r_not_true))
        r_not_self = self.similarity(self.logic_not(constraint), constraint)
        r_not_self = (r_not_self * constraint_valid).sum()
        check_list.append(('r_not_self', r_not_self))
        r_not_not_self = 1 - self.similarity(
            self.logic_not(self.logic_not(constraint)), constraint)
        r_not_not_self = (r_not_not_self * constraint_valid).sum()
        check_list.append(('r_not_not_self', r_not_not_self))

        # and
        r_and_true = 1 - self.similarity(
            self.logic_and(constraint, self.true, train=train), constraint)
        r_and_true = (r_and_true * constraint_valid).sum()
        check_list.append(('r_and_true', r_and_true))

        r_and_false = 1 - self.similarity(
            self.logic_and(constraint, false, train=train), false)
        r_and_false = (r_and_false * constraint_valid).sum()
        check_list.append(('r_and_false', r_and_false))

        r_and_self = 1 - self.similarity(
            self.logic_and(constraint, constraint, train=train), constraint)
        r_and_self = (r_and_self * constraint_valid).sum()
        check_list.append(('r_and_self', r_and_self))

        r_and_not_self = 1 - self.similarity(
            self.logic_and(constraint, self.logic_not(constraint),
                           train=train), false)
        r_and_not_self = (r_and_not_self * constraint_valid).sum()
        check_list.append(('r_and_not_self', r_and_not_self))

        # or
        r_or_true = 1 - self.similarity(
            self.logic_or(constraint, self.true, train=train), self.true)
        r_or_true = (r_or_true * constraint_valid).sum()
        check_list.append(('r_or_true', r_or_true))

        r_or_false = 1 - self.similarity(
            self.logic_or(constraint, false, train=train), constraint)
        r_or_false = (r_or_false * constraint_valid).sum()
        check_list.append(('r_or_false', r_or_false))

        r_or_self = 1 - self.similarity(
            self.logic_or(constraint, constraint, train=train), constraint)
        r_or_self = (r_or_self * constraint_valid).sum()
        check_list.append(('r_or_self', r_or_self))

        r_or_not_self = 1 - self.similarity(
            self.logic_or(constraint, self.logic_not(constraint), train=train),
            self.true)
        r_or_not_self = (r_or_not_self * constraint_valid).sum()
        check_list.append(('r_or_not_self', r_or_not_self))

        r_loss = 0
        r_loss += r_not_true + r_not_self + r_not_not_self + \
                  r_and_true + r_and_false + r_and_self + r_and_not_self + \
                  r_or_true + r_or_false + r_or_self + r_or_not_self

        if self.r_logic > 0:
            r_loss = r_loss * self.r_logic
        else:
            r_loss = utils.numpy_to_torch(np.array(0.0, dtype=np.float32))
        r_loss += r_length * self.r_length
        check_list.append(('r_loss', r_loss))

        out_dict['r_loss'] = r_loss
        return out_dict
示例#30
0
文件: CFKG.py 项目: youngzw/ReChorus
 def loss(self, predictions):
     batch_size = predictions.shape[0]
     pos_pred, neg_pred = predictions[:, :2].flatten(), predictions[:, 2:].flatten()
     loss = self.loss_function(pos_pred, neg_pred, utils.numpy_to_torch(np.ones(batch_size * 2)))
     return loss