Пример #1
0
    def __init__(self, dim, maxlen, indexes):
        self.dim = dim
        self.maxlen = maxlen

        self.item_index = indexes[0]
        self.city_index = indexes[1]
        self.action_index = indexes[2]

        hash_flag = True
        iFeature = SingleFeat('item', len(self.item_index) + 1, hash_flag)
        cFeature = SingleFeat('city', len(self.city_index) + 1, hash_flag)
        pFeature = SingleFeat('position', 25 + 1, hash_flag)
        aFeature = SingleFeat('action', len(self.action_index) + 1, hash_flag)

        self.feature_dim_dict = {"sparse": [iFeature, cFeature, pFeature, aFeature],
                                 "dense": [SingleFeat('price', False)]}

        self.behavior_feature_list = ["item", "city", "position", "action"]

        self.model = DSIN(self.feature_dim_dict, self.behavior_feature_list, sess_max_count=1, sess_len_max=self.maxlen,
                          embedding_size=self.dim,
                          att_head_num=1,
                          att_embedding_size=self.dim * len(self.behavior_feature_list),
                          dnn_hidden_units=[self.dim, self.dim, self.dim ], dnn_dropout=0.5)

        self.model.compile('adam', 'binary_crossentropy',
                           metrics=['acc'])
    def __init__(self, uNum, iNum, dim, maxlen):
        self.uNum = uNum
        self.iNum = iNum
        self.dim = dim
        self.maxlen = maxlen

        hash_flag = True
        self.feature_columns = [SparseFeat('user', self.uNum, hash_flag),
                                SparseFeat('item', self.iNum, hash_flag),
                                VarLenSparseFeat('sess_0_item', self.iNum, self.dim, use_hash=hash_flag,
                                                 embedding_name='item')]

        self.behavior_feature_list = ["item"]

        self.model = DSIN(self.feature_columns, self.behavior_feature_list, sess_max_count=1,
                          embedding_size=self.dim,
                          att_head_num=self.dim,
                          dnn_hidden_units=[self.dim, self.dim, self.dim], dnn_dropout=0.5)

        self.model.compile('adam', 'binary_crossentropy',
                           metrics=['acc'])
Пример #3
0
    sess1_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [0, 0, 0, 0]])
    sess1_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [0, 0, 0, 0]])

    sess2_iid = np.array([[1, 2, 3, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
    sess2_igender = np.array([[1, 1, 2, 0], [0, 0, 0, 0], [0, 0, 0, 0]])

    sess_number = np.array([2, 1, 0])

    feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender,
                    'sess_0_item': sess1_iid, 'sess_0_item_gender': sess1_igender, 'score': score,
                    'sess_1_item': sess2_iid, 'sess_1_item_gender': sess2_igender, }

    x = {name: feature_dict[name] for name in get_feature_names(feature_columns)}
    x["sess_length"] = sess_number
    y = [1, 0, 1]
    return x, y, feature_columns, behavior_feature_list


if __name__ == "__main__":
    if tf.__version__ >= '2.0.0':
        tf.compat.v1.disable_eager_execution()

    x, y, feature_columns, behavior_feature_list = get_xy_fd(True)

    model = DSIN(feature_columns, behavior_feature_list, sess_max_count=2,
                 dnn_hidden_units=[4, 4, 4], dnn_dropout=0.5, )

    model.compile('adam', 'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)
Пример #4
0
class AttentionModel():
    def __init__(self, dim, maxlen, indexes):
        self.dim = dim
        self.maxlen = maxlen

        self.item_index = indexes[0]
        self.city_index = indexes[1]
        self.action_index = indexes[2]

        hash_flag = True
        iFeature = SingleFeat('item', len(self.item_index) + 1, hash_flag)
        cFeature = SingleFeat('city', len(self.city_index) + 1, hash_flag)
        pFeature = SingleFeat('position', 25 + 1, hash_flag)
        aFeature = SingleFeat('action', len(self.action_index) + 1, hash_flag)

        self.feature_dim_dict = {"sparse": [iFeature, cFeature, pFeature, aFeature],
                                 "dense": [SingleFeat('price', False)]}

        self.behavior_feature_list = ["item", "city", "position", "action"]

        self.model = DSIN(self.feature_dim_dict, self.behavior_feature_list, sess_max_count=1, sess_len_max=self.maxlen,
                          embedding_size=self.dim,
                          att_head_num=1,
                          att_embedding_size=self.dim * len(self.behavior_feature_list),
                          dnn_hidden_units=[self.dim, self.dim, self.dim ], dnn_dropout=0.5)

        self.model.compile('adam', 'binary_crossentropy',
                           metrics=['acc'])

    def get_features(self, item, impressions, prices, row):

        action = self.action_index[row['action_type']]
        city = self.city_index[row['city']]
        position = impressions.index(item) + 1 if item in impressions else 0
        # print(position)
        price = prices[position - 1] if position != 0 else 0

        return action, city, position, price

    def generate_data(self, df, mode="train"):

        sessions, itemIds, cand_items, cand_actions, cand_cities, cand_positions, cand_prices, seq_items, seq_cities, seq_actions, seq_prices, seq_positions, labels = [], [], [], [], [], [], [], [], [], [], [], [], []
        for idx, rows in df.groupby("session_id"):
            seq_item, seq_city, seq_action, seq_price, seq_position = [], [], [], [], []
            lastRow = rows.iloc[-1]

            if lastRow["action_type"] != "clickout item":
                continue

            if mode == "val":
                if type(lastRow['reference']) == float:
                    continue
            elif mode == "test":
                if type(lastRow['reference']) != float:
                    continue

            histRows = rows.iloc[:-1]

            impressions = [self.item_index[int(i)] for i in lastRow['impressions'].split("|")]
            prices = [int(i) for i in lastRow['prices'].split("|")]
            if mode == "train":
                gtItem = self.item_index[int(lastRow['reference'])]
                action, city, position, price = self.get_features(gtItem, impressions, prices, lastRow)

            if len(histRows) > 0:
                for _i, _r in histRows.iterrows():
                    _item = self.item_index[int(_r['reference'])]

                    _action, _city, _position, _price = self.get_features(_item, impressions, prices, _r)

                    seq_item.append(_item)
                    seq_position.append(_position)
                    seq_city.append(_city)
                    seq_action.append(_action)
                    seq_price.append(_price)

            if mode == "train":

                seq_items.append(seq_item)
                seq_positions.append(seq_position)
                seq_cities.append(seq_city)
                seq_actions.append(seq_action)
                seq_prices.append(seq_price)
                labels.append(1)

                # sample negative instance from impressions
                pool = impressions if len(impressions) > 1 else np.arange(len(self.item_index)).tolist()
                sample = negative_sample(pool, gtItem)
                action, city, position, price = self.get_features(sample, impressions, prices, lastRow)

                cand_items.append(sample)
                cand_actions.append(action)
                cand_cities.append(city)
                cand_positions.append(position)
                cand_prices.append(price)
                seq_items.append(seq_item)
                seq_positions.append(seq_position)
                seq_cities.append(seq_city)
                seq_actions.append(seq_action)
                seq_prices.append(seq_price)
                labels.append(0)

            else:
                _action = self.action_index[lastRow['action_type']]
                _city = self.city_index[lastRow['city']]
                if mode == "val":
                    gtItem = self.item_index[int(lastRow['reference'])]

                for _position, (_item, _price) in enumerate(zip(impressions, prices)):
                    cand_items.append(_item)
                    cand_actions.append(_action)
                    cand_cities.append(_city)
                    cand_positions.append(_position+1)
                    cand_prices.append(_price)

                    seq_items.append(seq_item)
                    seq_positions.append(seq_position)
                    seq_cities.append(seq_city)
                    seq_actions.append(seq_action)
                    seq_prices.append(seq_price)

                    if mode == "val":
                        labels.append(1 if _item == gtItem else 0)

                sessions.extend([lastRow['session_id']]*len(impressions))
                itemIds.extend([i for i in lastRow['impressions'].split("|")])


        cand_items = np.array(cand_items)
        cand_positions = np.array(cand_positions)
        cand_cities = np.array(cand_cities)
        cand_actions = np.array(cand_actions)
        cand_prices = np.array(cand_prices)
        seq_items = pad_sequences(seq_items, maxlen=self.maxlen)
        seq_positions = pad_sequences(seq_positions, maxlen=self.maxlen)
        seq_cities = pad_sequences(seq_cities, maxlen=self.maxlen)
        seq_actions = pad_sequences(seq_actions, maxlen=self.maxlen)
        seq_prices = pad_sequences(seq_prices, maxlen=self.maxlen)
        labels = np.array(labels)


        feature_dict = {'item': cand_items, 'position': cand_positions, 'city': cand_cities, 'action': cand_actions,
                        'price': cand_prices,
                        'seq_item': seq_items, 'seq_position': seq_positions, 'seq_city': seq_cities,
                        'seq_action': seq_actions, 'price': cand_prices, 'seq_price': seq_prices}

        x = [feature_dict[feat.name] for feat in self.feature_dim_dict["sparse"]] + [feature_dict[feat.name] for feat in
                                                                                     self.feature_dim_dict["dense"]] + [
                feature_dict['seq_' + feat] for feat in self.behavior_feature_list]

        x += [np.arange(len(cand_items))]

        y = labels

        # for i in [cand_items, cand_positions, cand_cities, cand_actions, cand_prices, seq_items, seq_positions, seq_cities, seq_actions, seq_prices, labels]:
        #     print(i.shape)

        if mode == "train":
            return x, y
        elif mode == "val":
            return sessions, x, y
        else:
            return sessions, itemIds, x
class DeepSessionInterestNetwork(Recommender):
    def __init__(self, uNum, iNum, dim, maxlen):
        self.uNum = uNum
        self.iNum = iNum
        self.dim = dim
        self.maxlen = maxlen

        hash_flag = True
        self.feature_columns = [SparseFeat('user', self.uNum, hash_flag),
                                SparseFeat('item', self.iNum, hash_flag),
                                VarLenSparseFeat('sess_0_item', self.iNum, self.dim, use_hash=hash_flag,
                                                 embedding_name='item')]

        self.behavior_feature_list = ["item"]

        self.model = DSIN(self.feature_columns, self.behavior_feature_list, sess_max_count=1,
                          embedding_size=self.dim,
                          att_head_num=self.dim,
                          dnn_hidden_units=[self.dim, self.dim, self.dim], dnn_dropout=0.5)

        self.model.compile('adam', 'binary_crossentropy',
                           metrics=['acc'])

    def init(self, trainSeq):
        self.trainSeq = trainSeq

    def load_pre_train(self, pre):
        super().load_pre_train(pre)

    def get_params(self):
        super().get_params()

    def train(self, x_train, y_train, batch_size):
        history = self.model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)
        hist = self.model.fit(x_train, y_train, batch_size=batch_size, epochs=1, verbose=0)
        loss = hist.history['loss'][0]

        return loss

    def get_train_instances(self, train):
        users, checkins, cand_venues, labels = [], [], [], []

        for u in self.trainSeq:
            visited = self.trainSeq[u]
            checkin_ = []
            for v in visited[:-1]:
                checkin_.append(v)
                checkins.extend(sequence.pad_sequences([checkin_[:]], maxlen=self.maxVenue))

            # start from the second venue in user's checkin sequence.
            visited = visited[1:]
            for i in range(len(visited)):
                cand_venues.append(visited[i])
                users.append(u)
                labels.append(1)
                j = np.random.randint(self.uNum)
                # check if j is in training dataset or in user's sequence at state i or not
                while (u, j) in train or j in visited[:i]:
                    j = np.random.randint(self.uNum)

                cand_venues.append(j)
                users.append(u)
                labels.append(0)

        sess_number = np.ones(len(labels))

        users = np.array(users)
        items = np.array(cand_venues)
        sess_item = np.array(checkins)
        labels = np.array(labels)

        feature_dict = {'user': users, 'item': items, 'score': labels, 'sess_0_item': sess_item}

        fixlen_feature_names = get_fixlen_feature_names(self.feature_columns)
        varlen_feature_names = get_varlen_feature_names(self.feature_columns)
        x = [feature_dict[name] for name in fixlen_feature_names] + [feature_dict[name] for name in
                                                                     varlen_feature_names]
        x += [sess_number]

        return x, labels

    def rank(self, users, items):
        super().rank(users, items)

    def save(self, path):
        super().save(path)