def cold_sampling(vectors, cs_ratio):
    :param vectors: ? * v
    :param cs_ratio: 0 < cs_ratio < 1
    cs_p = torch.empty(vectors.size()[:-1]).fill_(cs_ratio).unsqueeze(dim=-1)  # ? * 1
    drop_pos = utils.tensor_to_gpu(torch.bernoulli(cs_p))  # ? * 1
    random_vectors = utils.tensor_to_gpu(torch.empty(vectors.size()).normal_(0, 0.01))  # ? * v
    cs_vectors = random_vectors * drop_pos + vectors * (1 - drop_pos)  # ? * v
    return cs_vectors
 def uniform_size(self, vector1, vector2, train):
     if len(vector1.size()) < len(vector2.size()):
         vector1 = vector1.expand_as(vector2)
         vector2 = vector2.expand_as(vector1)
     if train:
         r12 = torch.Tensor(vector1.size()[:-1]).uniform_(0, 1).bernoulli()
         r12 = utils.tensor_to_gpu(r12).unsqueeze(-1)
         new_v1 = r12 * vector1 + (-r12 + 1) * vector2
         new_v2 = r12 * vector2 + (-r12 + 1) * vector1
         return new_v1, new_v2
     return vector1, vector2
    def predict(self, feed_dict):
        check_list, embedding_l2 = [], []

        total_batch_size = feed_dict[TOTAL_BATCH_SIZE]
        real_batch_size = feed_dict[REAL_BATCH_SIZE]
        u_ids = feed_dict[UID]
        i_ids = feed_dict[IID]
        cf_i_vectors = self.iid_embeddings(i_ids)

        history = feed_dict[C_HISTORY]
        his_i_vectors = self.iid_embeddings_his(i_ids)
        if 'sparse' in str(history.type()):
            all_his_vector = history.mm(self.iid_embeddings_his.weight)
            if feed_dict[TRAIN]:
                # remove item i from history vectors
                if real_batch_size != total_batch_size:
                    padding_zeros = torch.zeros(size=[
                        total_batch_size - real_batch_size, self.ui_vector_size
                    padding_zeros = utils.tensor_to_gpu(padding_zeros)
                    tmp_his_i_vectors = torch.cat(
                        [his_i_vectors[:real_batch_size], padding_zeros])
                    tmp_his_i_vectors = his_i_vectors
                his_vector = all_his_vector - tmp_his_i_vectors
                his_length = feed_dict[C_HISTORY_LENGTH] - 1
                his_vector = all_his_vector
                his_length = feed_dict[C_HISTORY_LENGTH]
            valid_his = history.gt(0).long()  # Batch * His
            if feed_dict[TRAIN]:
                if_target_item = (history != i_ids.view([-1, 1])).long()
                valid_his = if_target_item * valid_his
            his_length = valid_his.sum(dim=1, keepdim=True)
            his_vectors = self.iid_embeddings_his(history *
                                                  valid_his)  # Batch * His * v
            valid_his = valid_his.view([total_batch_size, -1,
                                        1]).float()  # Batch * His * 1
            his_vectors = his_vectors * valid_his  # Batch * His * v
            his_vector = his_vectors.sum(dim=1)

        # normalize alpha = 0.5
        valid_his = his_length.gt(0).float()
        tmp_length = his_length.float() * valid_his + (1 - valid_his) * 1
        his_vector = his_vector / tmp_length.sqrt().view([-1, 1])

        # bias
        u_bias = self.user_bias(u_ids).view([-1])
        i_bias = self.item_bias(i_ids).view([-1])
        embedding_l2.extend([u_bias, i_bias])

        # cf_u_vectors = self.uid_embeddings(u_ids)
        # cf_i_vectors = self.iid_embeddings(i_ids)
        prediction = (his_vector * cf_i_vectors).sum(dim=1).view([-1])
        prediction = prediction + u_bias + i_bias + self.global_bias
        # prediction = prediction + self.global_bias
        # check_list.append(('prediction', prediction))

        out_dict = {
            PREDICTION: prediction,
            CHECK: check_list,
            EMBEDDING_L2: embedding_l2
        return out_dict
    def predict(self, feed_dict):
        check_list, embedding_l2 = [], []
        u_ids = feed_dict[UID]
        i_ids = feed_dict[IID]

        gmf_i_vectors = self.gmf_iid_embeddings(i_ids.view([-1, 1]))
        mlp_i_vectors = self.mlp_iid_embeddings(i_ids)
        embedding_l2.extend([gmf_i_vectors, mlp_i_vectors])

        total_batch_size = feed_dict[TOTAL_BATCH_SIZE]
        real_batch_size = feed_dict[REAL_BATCH_SIZE]

        # # history to hash_uid
        history = feed_dict[C_HISTORY]
        his_i_vectors = self.gmf_iid_embeddings(i_ids)
        if 'sparse' in str(history.type()):
            all_his_vector = history.mm(self.gmf_iid_embeddings.weight)
            if feed_dict[TRAIN]:
                # remove item i from history vectors
                if real_batch_size != total_batch_size:
                    padding_zeros = torch.zeros(size=[
                        total_batch_size - real_batch_size, self.ui_vector_size
                    padding_zeros = utils.tensor_to_gpu(padding_zeros)
                    tmp_his_i_vectors = torch.cat(
                        [his_i_vectors[:real_batch_size], padding_zeros])
                    tmp_his_i_vectors = his_i_vectors
                his_vector = all_his_vector - tmp_his_i_vectors
                his_length = feed_dict[C_HISTORY_LENGTH] - 1
                his_vector = all_his_vector
                his_length = feed_dict[C_HISTORY_LENGTH]
            # normalize alpha = 0.5
            valid_his = his_length.gt(0).float()
            tmp_length = his_length.float() * valid_his + (1 - valid_his) * 1
            his_vector = his_vector / tmp_length.sqrt().view([-1, 1])
            valid_his = history.gt(0).long()  # Batch * His
            if feed_dict[TRAIN]:
                if_target_item = (history != i_ids.view([-1, 1])).long()
                valid_his = if_target_item * valid_his
            his_length = valid_his.sum(dim=1, keepdim=True)
            his_vectors = self.gmf_iid_embeddings(history *
                                                  valid_his)  # Batch * His * v
            valid_his = valid_his.view([total_batch_size, -1,
                                        1]).float()  # Batch * His * 1
            his_vectors = his_vectors * valid_his  # Batch * His * v
            his_att = (his_vectors * gmf_i_vectors).sum(
                dim=-1, keepdim=True).exp() * valid_his  # Batch * His * 1
            his_att_sum = his_att.sum(dim=1, keepdim=True)  # Batch * 1 * 1
            his_att_weight = his_att / (his_att_sum + 1e-8)
            all_his_vector = (his_vectors * his_att_weight).sum(
                dim=1)  # Batch * 64
            his_vector = all_his_vector
            # normalize alpha = 0.5
            his_vector = his_vector * his_length.float().sqrt().view([-1, 1])

        hash_layer = his_vector.detach()
        for i, layer_size in enumerate(self.hash_layers):
            hash_layer = getattr(self, 'u_hash_%d' % i)(hash_layer)
            hash_layer = F.relu(hash_layer)
            hash_layer = torch.nn.Dropout(p=feed_dict[DROPOUT])(hash_layer)

        # # tree hash
        u_tree_weights = self.u_hash_predict(hash_layer)
        u_tree_weights = u_tree_weights.clamp(min=-10)
        tree_layers = [0] + self.tree_layers + [self.hash_u_num]
        tree_layers_weights, lo, hi = [], 0, 0
        for i in range(len(tree_layers) - 1):
            lo, hi = lo + tree_layers[i], hi + tree_layers[i + 1]
            tree_layers_weights.append(u_tree_weights[:, lo:hi])
        u_hash_weights = tree_layers_weights[0].softmax(dim=-1)
        for i, weights in enumerate(tree_layers_weights[1:]):
            weights = weights.view([total_batch_size, tree_layers[i + 1],
            u_hash_weights = (
                weights * u_hash_weights.view([total_batch_size, -1, 1])).view(
                    [total_batch_size, -1])

        # check_list.append(('u_hash_weights_min', u_hash_weights.min(dim=1)[0].view([-1])))
        # check_list.append(('u_hash_weights_max', u_hash_weights.max(dim=1)[0].view([-1])))

        # # # get max prob hash id
        # u_max_prob_weights, u_max_prob_ids = u_hash_weights.topk(k=self.hash_u_num, dim=1, sorted=True)
        if not feed_dict[TRAIN]:
            sample_max_n = min(self.sample_max_n, self.hash_u_num)
            u_max_prob_weights, u_max_prob_ids = u_hash_weights.topk(
                k=sample_max_n, dim=1, sorted=True)
            sample_r_n = min(self.sample_r_n, self.hash_u_num)
            sample_uids = torch.randint(0,
            sample_uids = utils.tensor_to_gpu(sample_uids)
            if real_batch_size != total_batch_size:
                sample_uids = torch.cat(
                    [sample_uids] * int(total_batch_size / real_batch_size))
            u_max_prob_weights, u_max_prob_ids = u_hash_weights.gather(
                1, sample_uids), sample_uids

        u_max_prob_weights = u_max_prob_weights / (
            u_max_prob_weights.sum(dim=-1, keepdim=True) + 1e-8)

        u_max_prob_vectors = self.uid_embeddings(u_max_prob_ids)
        u_max_prob_vectors = u_max_prob_vectors * u_max_prob_weights.unsqueeze(
        u_max_prob_vectors = u_max_prob_vectors.sum(dim=1, keepdim=True)

        anchor_uids = feed_dict[K_ANCHOR_USER].view([-1, 1])
        if_anchor_uids = anchor_uids.gt(0).long()
        anchor_uids = anchor_uids * if_anchor_uids
        if_anchor_uids = if_anchor_uids.view([-1, 1, 1]).float()
        anchor_vectors = self.uid_embeddings(anchor_uids) * if_anchor_uids
        hash_anchor_vectors = anchor_vectors * if_anchor_uids + u_max_prob_vectors * (
            1 - if_anchor_uids)

        u_transfer_vectors = torch.cat(
            (u_max_prob_vectors, his_vector.view_as(u_max_prob_vectors)),
        if feed_dict[TRAIN] and 1 > self.cs_ratio > 0:
            drop_pos = torch.empty(size=(feed_dict[TOTAL_BATCH_SIZE], 2,
            random_vectors = torch.empty(
                size=u_transfer_vectors.size()).normal_(mean=0, std=0.01)
            drop_pos = utils.tensor_to_gpu(drop_pos)
            random_vectors = utils.tensor_to_gpu(random_vectors)
            u_transfer_vectors = u_transfer_vectors * (
                1 - drop_pos) + drop_pos * random_vectors
        u_transfer_att = self.transfer_att_pre(
        u_transfer_vectors = (u_transfer_vectors * u_transfer_att).sum(dim=1)
        # check_list.append(('u_transfer_vectors', u_transfer_vectors))

        gmf_i_vectors = gmf_i_vectors.view([-1, self.ui_vector_size])
        gmf = u_transfer_vectors * gmf_i_vectors

        mlp = torch.cat((u_transfer_vectors, mlp_i_vectors), dim=1)
        for layer in self.mlp:
            mlp = layer(mlp)
            mlp = F.relu(mlp)
            mlp = torch.nn.Dropout(p=feed_dict[DROPOUT])(mlp)

        output = torch.cat((gmf, mlp), dim=1)
        for layer in self.p_layer:
            output = layer(output)
            output = F.relu(output)
            output = torch.nn.Dropout(p=feed_dict[DROPOUT])(output)

        prediction = self.prediction(output).view([-1])
        # check_list.append(('prediction', prediction))

        out_dict = {
            PREDICTION: prediction,
            CHECK: check_list,
            EMBEDDING_L2: embedding_l2
        return out_dict
    def predict_and_or(self, feed_dict):
        check_list, embedding_l2 = [], []
        train = feed_dict[TRAIN]
        seq_rec = self.seq_rec == 1
        total_batch_size = feed_dict[TOTAL_BATCH_SIZE]  # = B
        real_batch_size = feed_dict[REAL_BATCH_SIZE]  # = rB

        history = feed_dict[C_HISTORY]  # B * H
        history_length = feed_dict[C_HISTORY_LENGTH]  # B

        his_pos_neg = history.ge(0).float().unsqueeze(-1)  # B * H * 1
        his_valid = history.abs().gt(0).float()  # B * H

        elements = self.feature_embeddings(history.abs())  # B * H * V
        not_elements = self.logic_not(elements)  # B * H * V
        elements = his_pos_neg * elements + (-his_pos_neg +
                                             1) * not_elements  # B * H * V
        elements = elements * his_valid.unsqueeze(-1)  # B * H * V

        constraint = [
            elements.view([total_batch_size, -1, self.v_vector_size])
        ]  # B * H * V
        constraint_valid = [his_valid.view([total_batch_size, -1])]  # B * H

        if self.seq_rec == 0:
            # 随机打乱顺序计算
            all_as, all_avs = [], []
            for i in range(max(history_length)):
                all_as.append(elements[:, i, :])  # B * V
                all_avs.append(his_valid[:, i].unsqueeze(-1))  # B * 1
            while len(all_as) > 1:
                idx_a, idx_b = 0, 1
                if train:
                    idx_a, idx_b = np.random.choice(len(all_as),
                if idx_a > idx_b:
                    a, av = all_as.pop(idx_a), all_avs.pop(
                        idx_a)  # B * V,  B * 1
                    b, bv = all_as.pop(idx_b), all_avs.pop(
                        idx_b)  # B * V,  B * 1
                    b, bv = all_as.pop(idx_b), all_avs.pop(
                        idx_b)  # B * V,  B * 1
                    a, av = all_as.pop(idx_a), all_avs.pop(
                        idx_a)  # B * V,  B * 1
                a_and_b = self.logic_and(a, b, train=train & ~seq_rec)  # B * V
                abv = av * bv  # B * 1
                ab = abv * a_and_b + av * (-bv + 1) * a + (-av +
                                                           1) * bv * b  # B * V
                all_as.insert(0, ab)
                all_avs.insert(0, (av + bv).gt(0).float())
                    ab.view([total_batch_size, 1, self.v_vector_size]))
            and_vector = all_as[0]
            left_valid = all_avs[0]
            # # 按顺序计算
            tmp_a = None
            for i in range(max(history_length)):
                tmp_a_valid = his_valid[:, i].unsqueeze(-1)  # B * 1
                if tmp_a is None:
                    tmp_a = elements[:, i, :] * tmp_a_valid  # B * V
                    tmp_a = self.logic_and(tmp_a, elements[:, i, :], train=train & ~seq_rec) * tmp_a_valid + \
                            tmp_a * (-tmp_a_valid + 1)  # B * V
                        tmp_a.view([total_batch_size, 1,
                                    self.v_vector_size]))  # B * 1 * V
                    constraint_valid.append(tmp_a_valid)  # B * 1
            and_vector = tmp_a  # B * V
            left_valid = his_valid[:, 0].unsqueeze(-1)  # B * 1

        all_valid = utils.tensor_to_gpu(torch.ones([total_batch_size,
                                                    1]))  # B * 1

        left_vector = self.logic_not(and_vector)  # B * V
            left_vector.view([total_batch_size, 1,
                              self.v_vector_size]))  # B * 1 * V
        constraint_valid.append(left_valid)  # B * 1

        right_vector = self.feature_embeddings(feed_dict[IID])  # B * V
            right_vector.view([total_batch_size, 1,
                               self.v_vector_size]))  # B * 1 * V
        constraint_valid.append(all_valid)  # B * 1

        sent_vector = self.logic_or(left_vector, right_vector, train=train & ~seq_rec) * left_valid \
                      + (-left_valid + 1) * right_vector  # B * V
        # sent_vector = self.logic_or(left_vector, right_vector, train=train & ~seq_rec)  # B * V
            sent_vector.view([total_batch_size, 1,
                              self.v_vector_size]))  # B * 1 * V
        constraint_valid.append(left_valid)  # B * 1

        if feed_dict[RANK] == 1:
            prediction = self.similarity(sent_vector, self.true,
            prediction = self.similarity(sent_vector, self.true, sigmoid=True) * \
                         (self.label_max - self.label_min) + self.label_min

        check_list.append(('prediction', prediction))
        check_list.append(('label', feed_dict[Y]))
        check_list.append(('true', self.true))

        constraint = torch.cat(tuple(constraint), dim=1)
        constraint_valid = torch.cat(tuple(constraint_valid), dim=1)
        out_dict = {
            PREDICTION: prediction,
            CHECK: check_list,
            'constraint': constraint,
            'constraint_valid': constraint_valid,
            EMBEDDING_L2: embedding_l2
        return out_dict
    def predict(self, feed_dict):
        check_list, embedding_l2 = [], []
        u_ids = feed_dict[UID]
        i_ids = feed_dict[IID]
        i_bias = self.item_bias(i_ids).view([-1])
        cf_i_vectors = self.iid_embeddings(i_ids.view([-1, 1]))
        embedding_l2.extend([cf_i_vectors, i_bias])

        total_batch_size = feed_dict[TOTAL_BATCH_SIZE]
        real_batch_size = feed_dict[REAL_BATCH_SIZE]

        # # history to hash_uid
        history = feed_dict[C_HISTORY]
        his_i_vectors = self.iid_embeddings(i_ids)
        if 'sparse' in str(history.type()):
            all_his_vector = history.mm(self.iid_embeddings.weight)
            if feed_dict[TRAIN]:
                # remove item i from history vectors
                if real_batch_size != total_batch_size:
                    padding_zeros = torch.zeros(size=[total_batch_size - real_batch_size, self.ui_vector_size],
                    padding_zeros = utils.tensor_to_gpu(padding_zeros)
                    tmp_his_i_vectors = torch.cat([his_i_vectors[:real_batch_size], padding_zeros])
                    tmp_his_i_vectors = his_i_vectors
                his_vector = all_his_vector - tmp_his_i_vectors
                his_length = feed_dict[C_HISTORY_LENGTH] - 1
                his_vector = all_his_vector
                his_length = feed_dict[C_HISTORY_LENGTH]
            # normalize alpha = 0.5
            valid_his = his_length.gt(0).float()
            tmp_length = his_length.float() * valid_his + (1 - valid_his) * 1
            his_vector = his_vector / tmp_length.sqrt().view([-1, 1])
            valid_his = history.gt(0).long()  # Batch * His
            if feed_dict[TRAIN]:
                if_target_item = (history != i_ids.view([-1, 1])).long()
                valid_his = if_target_item * valid_his
            his_length = valid_his.sum(dim=1, keepdim=True)
            his_vectors = self.iid_embeddings(history * valid_his)  # Batch * His * v
            valid_his = valid_his.view([total_batch_size, -1, 1]).float()  # Batch * His * 1
            his_vectors = his_vectors * valid_his  # Batch * His * v
            his_att = (his_vectors * cf_i_vectors).sum(dim=-1, keepdim=True).exp() * valid_his  # Batch * His * 1
            his_att_sum = his_att.sum(dim=1, keepdim=True)  # Batch * 1 * 1
            his_att_weight = his_att / (his_att_sum + 1e-8)
            all_his_vector = (his_vectors * his_att_weight).sum(dim=1)  # Batch * 64
            his_vector = all_his_vector
            # normalize alpha = 0.5
            his_vector = his_vector * his_length.float().sqrt().view([-1, 1])

        hash_layer = his_vector.detach()
        for i, layer_size in enumerate(self.hash_layers):
            hash_layer = getattr(self, 'u_hash_%d' % i)(hash_layer)
            hash_layer = F.relu(hash_layer)
            hash_layer = torch.nn.Dropout(p=feed_dict[DROPOUT])(hash_layer)

        # # tree hash
        u_tree_weights = self.u_hash_predict(hash_layer)
        u_tree_weights = u_tree_weights.clamp(min=-10)
        tree_layers = [0] + self.tree_layers + [self.hash_u_num]
        tree_layers_weights, lo, hi = [], 0, 0
        for i in range(len(tree_layers) - 1):
            lo, hi = lo + tree_layers[i], hi + tree_layers[i + 1]
            tree_layers_weights.append(u_tree_weights[:, lo:hi])
        u_hash_weights = tree_layers_weights[0].softmax(dim=-1)
        for i, weights in enumerate(tree_layers_weights[1:]):
            weights = weights.view([total_batch_size, tree_layers[i + 1], -1]).softmax(dim=-1)
            u_hash_weights = (weights * u_hash_weights.view([total_batch_size, -1, 1])).view([total_batch_size, -1])

        # check_list.append(('u_hash_weights_min', u_hash_weights.min(dim=1)[0].view([-1])))
        # check_list.append(('u_hash_weights_max', u_hash_weights.max(dim=1)[0].view([-1])))

        # # # get max prob hash id
        # u_max_prob_weights, u_max_prob_ids = u_hash_weights.topk(k=self.hash_u_num, dim=1, sorted=True)
        if not feed_dict[TRAIN]:
            sample_max_n = min(self.sample_max_n, self.hash_u_num)
            u_max_prob_weights, u_max_prob_ids = u_hash_weights.topk(k=sample_max_n, dim=1, sorted=True)
            sample_r_n = min(self.sample_r_n, self.hash_u_num)
            sample_uids = torch.randint(0, self.hash_u_num, size=[real_batch_size, sample_r_n]).long()
            sample_uids = utils.tensor_to_gpu(sample_uids)
            if real_batch_size != total_batch_size:
                sample_uids = torch.cat([sample_uids] * int(total_batch_size / real_batch_size))
            u_max_prob_weights, u_max_prob_ids = u_hash_weights.gather(1, sample_uids), sample_uids

        u_max_prob_weights = u_max_prob_weights / (u_max_prob_weights.sum(dim=-1, keepdim=True) + 1e-8)

        u_max_prob_vectors = self.uid_embeddings(u_max_prob_ids)
        u_max_prob_vectors = u_max_prob_vectors * u_max_prob_weights.unsqueeze(dim=2)
        u_max_prob_vectors = u_max_prob_vectors.sum(dim=1, keepdim=True)

        anchor_uids = feed_dict[K_ANCHOR_USER].view([-1, 1])
        if_anchor_uids = anchor_uids.gt(0).long()
        anchor_uids = anchor_uids * if_anchor_uids
        if_anchor_uids = if_anchor_uids.view([-1, 1, 1]).float()
        anchor_vectors = self.uid_embeddings(anchor_uids) * if_anchor_uids
        hash_anchor_vectors = anchor_vectors * if_anchor_uids + u_max_prob_vectors * (1 - if_anchor_uids)

        u_transfer_vectors = torch.cat((u_max_prob_vectors, his_vector.view_as(u_max_prob_vectors)), dim=1)
        if feed_dict[TRAIN] and 1 > self.cs_ratio > 0:
            drop_pos = torch.empty(size=(feed_dict[TOTAL_BATCH_SIZE], 2, 1)).bernoulli_(p=self.cs_ratio)
            random_vectors = torch.empty(size=u_transfer_vectors.size()).normal_(mean=0, std=0.01)
            drop_pos = utils.tensor_to_gpu(drop_pos)
            random_vectors = utils.tensor_to_gpu(random_vectors)
            u_transfer_vectors = u_transfer_vectors * (1 - drop_pos) + drop_pos * random_vectors
        u_transfer_att = self.transfer_att_pre(F.relu(self.transfer_att_layer(u_transfer_vectors))).softmax(dim=1)
        u_transfer_vectors = (u_transfer_vectors * u_transfer_att).sum(dim=1)
        # check_list.append(('u_transfer_vectors', u_transfer_vectors))

        cf_i_vectors = cf_i_vectors.view([-1, self.ui_vector_size])
        # cold sampling
        if feed_dict[TRAIN] and 1 > self.cs_ratio > 0:
            drop_ui_pos = utils.numpy_to_torch(
                np.random.choice(np.array([0, 1], dtype=np.float32), size=(feed_dict[TOTAL_BATCH_SIZE], 2),
                                 p=[1 - self.cs_ratio, self.cs_ratio]))
            # check_list.append(('drop_ui_pos', drop_ui_pos))
            drop_u_pos, drop_i_pos = drop_ui_pos[:, 0], drop_ui_pos[:, 1]
            drop_u_pos_v, drop_i_pos_v = drop_u_pos.view([-1, 1]), drop_i_pos.view([-1, 1])
            random_u_vectors = utils.numpy_to_torch(
                np.random.normal(0, 0.01, u_transfer_vectors.size()).astype(np.float32))
            random_i_vectors = utils.numpy_to_torch(np.random.normal(0, 0.01, cf_i_vectors.size()).astype(np.float32))
            i_bias = i_bias * (1 - drop_i_pos)
            u_transfer_vectors = random_u_vectors * drop_u_pos_v + u_transfer_vectors * (1 - drop_u_pos_v)
            cf_i_vectors = random_i_vectors * drop_i_pos_v + cf_i_vectors * (1 - drop_i_pos_v)

        # cf
        bias = i_bias + self.global_bias
        cf_prediction = (u_transfer_vectors * cf_i_vectors).sum(dim=-1).view([-1]) + bias

        # cb
        u_fs = feed_dict[X][:, :self.user_feature_num]
        i_fs = feed_dict[X][:, self.user_feature_num:]
        uf_layer = self.feature_embeddings(u_fs).view(-1, self.f_vector_size * self.user_feature_num)
        if_layer = self.feature_embeddings(i_fs).view(-1, self.f_vector_size * self.item_feature_num)
        embedding_l2.extend([uf_layer, if_layer])

        for i in range(0, len(self.cb_hidden_layers) + 1):
            uf_layer = getattr(self, 'user_layer_%d' % i)(uf_layer)
            if_layer = getattr(self, 'item_layer_%d' % i)(if_layer)
            uf_layer = getattr(self, 'user_bn_%d' % i)(uf_layer)
            if_layer = getattr(self, 'item_bn_%d' % i)(if_layer)
            if i < len(self.cb_hidden_layers):
                uf_layer = F.relu(uf_layer)
                uf_layer = torch.nn.Dropout(p=feed_dict[DROPOUT])(uf_layer)
                if_layer = F.relu(if_layer)
                if_layer = torch.nn.Dropout(p=feed_dict[DROPOUT])(if_layer)
        cb_u_vectors, cb_i_vectors = uf_layer, if_layer
        cb_prediction = (cb_u_vectors * if_layer).sum(dim=1).view([-1]) + bias

        # attention
        ah_cf_u = self.attention_layer(u_transfer_vectors)
        ah_cf_u = torch.tanh(ah_cf_u)
        a_cf_u = self.attention_prediction(ah_cf_u)
        a_cf_u = torch.exp(a_cf_u)

        ah_cb_u = self.attention_layer(cb_u_vectors)
        ah_cb_u = torch.tanh(ah_cb_u)
        a_cb_u = self.attention_prediction(ah_cb_u)
        a_cb_u = torch.exp(a_cb_u)

        a_sum = a_cf_u + a_cb_u

        a_cf_u = a_cf_u / a_sum
        a_cb_u = a_cb_u / a_sum

        ah_cf_i = self.attention_layer(cf_i_vectors)
        ah_cf_i = torch.tanh(ah_cf_i)
        a_cf_i = self.attention_prediction(ah_cf_i)
        a_cf_i = torch.exp(a_cf_i)

        ah_cb_i = self.attention_layer(cb_i_vectors)
        ah_cb_i = torch.tanh(ah_cb_i)
        a_cb_i = self.attention_prediction(ah_cb_i)
        a_cb_i = torch.exp(a_cb_i)

        a_sum = a_cf_i + a_cb_i
        a_cf_i = a_cf_i / a_sum
        a_cb_i = a_cb_i / a_sum

        u_vector = a_cf_u * u_transfer_vectors + a_cb_u * cb_u_vectors
        i_vector = a_cf_i * cf_i_vectors + a_cb_i * cb_i_vectors
        prediction = (u_vector * i_vector).sum(dim=1).view([-1]) + bias
        # check_list.append(('prediction', prediction))

        # cf_loss = torch.nn.MSELoss()(cf_prediction, feed_dict['Y'])
        # cb_loss = torch.nn.MSELoss()(cb_prediction, feed_dict['Y'])
        # loss = torch.nn.MSELoss()(prediction, feed_dict['Y']) + cf_loss + cb_loss
        out_dict = {PREDICTION: prediction,
                    'cb_prediction': cb_prediction, 'cf_prediction': cf_prediction,
                    CHECK: check_list, EMBEDDING_L2: embedding_l2}
        return out_dict