Пример #1
0
    sess1_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [0, 0, 0, 0]])
    sess1_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [0, 0, 0, 0]])

    sess2_iid = np.array([[1, 2, 3, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
    sess2_igender = np.array([[1, 1, 2, 0], [0, 0, 0, 0], [0, 0, 0, 0]])

    sess_number = np.array([2, 1, 0])

    feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender,
                    'sess_0_item': sess1_iid, 'sess_0_item_gender': sess1_igender, 'score': score,
                    'sess_1_item': sess2_iid, 'sess_1_item_gender': sess2_igender, }

    x = {name: feature_dict[name] for name in get_feature_names(feature_columns)}
    x["sess_length"] = sess_number
    y = [1, 0, 1]
    return x, y, feature_columns, behavior_feature_list


if __name__ == "__main__":
    if tf.__version__ >= '2.0.0':
        tf.compat.v1.disable_eager_execution()

    x, y, feature_columns, behavior_feature_list = get_xy_fd(True)

    model = DSIN(feature_columns, behavior_feature_list, sess_max_count=2,
                 dnn_hidden_units=[4, 4, 4], dnn_dropout=0.5, )

    model.compile('adam', 'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)
class DeepSessionInterestNetwork(Recommender):
    def __init__(self, uNum, iNum, dim, maxlen):
        self.uNum = uNum
        self.iNum = iNum
        self.dim = dim
        self.maxlen = maxlen

        hash_flag = True
        self.feature_columns = [SparseFeat('user', self.uNum, hash_flag),
                                SparseFeat('item', self.iNum, hash_flag),
                                VarLenSparseFeat('sess_0_item', self.iNum, self.dim, use_hash=hash_flag,
                                                 embedding_name='item')]

        self.behavior_feature_list = ["item"]

        self.model = DSIN(self.feature_columns, self.behavior_feature_list, sess_max_count=1,
                          embedding_size=self.dim,
                          att_head_num=self.dim,
                          dnn_hidden_units=[self.dim, self.dim, self.dim], dnn_dropout=0.5)

        self.model.compile('adam', 'binary_crossentropy',
                           metrics=['acc'])

    def init(self, trainSeq):
        self.trainSeq = trainSeq

    def load_pre_train(self, pre):
        super().load_pre_train(pre)

    def get_params(self):
        super().get_params()

    def train(self, x_train, y_train, batch_size):
        history = self.model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)
        hist = self.model.fit(x_train, y_train, batch_size=batch_size, epochs=1, verbose=0)
        loss = hist.history['loss'][0]

        return loss

    def get_train_instances(self, train):
        users, checkins, cand_venues, labels = [], [], [], []

        for u in self.trainSeq:
            visited = self.trainSeq[u]
            checkin_ = []
            for v in visited[:-1]:
                checkin_.append(v)
                checkins.extend(sequence.pad_sequences([checkin_[:]], maxlen=self.maxVenue))

            # start from the second venue in user's checkin sequence.
            visited = visited[1:]
            for i in range(len(visited)):
                cand_venues.append(visited[i])
                users.append(u)
                labels.append(1)
                j = np.random.randint(self.uNum)
                # check if j is in training dataset or in user's sequence at state i or not
                while (u, j) in train or j in visited[:i]:
                    j = np.random.randint(self.uNum)

                cand_venues.append(j)
                users.append(u)
                labels.append(0)

        sess_number = np.ones(len(labels))

        users = np.array(users)
        items = np.array(cand_venues)
        sess_item = np.array(checkins)
        labels = np.array(labels)

        feature_dict = {'user': users, 'item': items, 'score': labels, 'sess_0_item': sess_item}

        fixlen_feature_names = get_fixlen_feature_names(self.feature_columns)
        varlen_feature_names = get_varlen_feature_names(self.feature_columns)
        x = [feature_dict[name] for name in fixlen_feature_names] + [feature_dict[name] for name in
                                                                     varlen_feature_names]
        x += [sess_number]

        return x, labels

    def rank(self, users, items):
        super().rank(users, items)

    def save(self, path):
        super().save(path)