Пример #1
0
    def run(self):
        print('/1')
        index, r_index = 0, 0
        dict_locid_catid,dict_catid_to_index,dict_r_catid_to_index= {},{},{}
        for ny_vcat_i in self.ny_vcat[1:]:
            ny_vcat_i = ny_vcat_i.split(',')
            locid_ = ny_vcat_i[0]
            catid_ = ny_vcat_i[1]
            if locid_ not in dict_locid_catid:
                dict_locid_catid[locid_] = (catid_, int(catid_))
                if catid_ not in dict_catid_to_index:
                    dict_catid_to_index[catid_] = index
                    index += 1
                if int(catid_) not in dict_r_catid_to_index:
                    dict_r_catid_to_index[int(catid_)] = r_index
                    r_index += 1
        set_locid_name = set(dict_locid_catid.keys())
        locid_name = list(set(dict_locid_catid.keys()))
        catid_set = list(set(dict_catid_to_index.values()))
        relation_catid_set = list(set(dict_r_catid_to_index.values()))
        print('/2')
        for i in range(len(locid_name)):
            locid_catid_ = dict_locid_catid[locid_name[i]]
            index_ = dict_catid_to_index[locid_catid_[0]]
            r_index_ = dict_r_catid_to_index[locid_catid_[1]]
            dict_locid_catid[locid_name[i]] = (index_, r_index_)
        print('/3')
        dict_uid_category,uid_category_set,gender_set,race_set = {},[],[],[]
        for ny_demo_i in self.ny_demo[1:]:
            ny_demo_i = ny_demo_i.split(',')
            uid_ = ny_demo_i[0]
            gender_ = int(ny_demo_i[1]) - 1
            race_ = int(ny_demo_i[3]) - 1
            category_ = gender_ * race_ + race_
            dict_uid_category[uid_] = (category_, (gender_, race_))
            uid_category_set.append(category_)
            gender_set.append(gender_)
            race_set.append(race_)
        uid_category_set = list(set(uid_category_set))
        uid_name = set(dict_uid_category.keys())
        self.gender_set = list(set(gender_set))
        self.race_set = list(set(race_set))
        print('/4')
        dict_uid_time_locid = {}
        for i in range(len(self.ny_checkin) - 1):
            ny_checkin_i = self.ny_checkin[i + 1].split(',')
            uid_, time_, locid_ = ny_checkin_i[1], ny_checkin_i[
                2], ny_checkin_i[5].strip('\n')
            time_day, time_hms = time_.split()[0], time_.split()[1]
            if uid_ in uid_name and locid_ in set_locid_name:
                year_ = int(time_day.split('-')[0])
                if year_ in self.dict_restrict['year']:
                    if uid_ not in dict_uid_time_locid:
                        dict_uid_time_locid[uid_] = [(time_day, time_hms,
                                                      locid_)]
                    else:
                        dict_uid_time_locid[uid_].append(
                            (time_day, time_hms, locid_))
        print('/5')
        uid_name = list(set(dict_uid_time_locid.keys()))
        dict_uid_locid_relation, dict_locid_to_entity, index = {}, {}, 0
        for u_i in range(len(uid_name)):
            uid_time_locid_ = dict_uid_time_locid[uid_name[u_i]]
            if len(uid_time_locid_) >= self.dict_restrict['num_checkin']:
                sorted_uid_time_locid_ = sorted(uid_time_locid_)
                for i in range(len(sorted_uid_time_locid_)):
                    time_day_, time_hms_, locid_ = sorted_uid_time_locid_[i][
                        0], sorted_uid_time_locid_[i][
                            1], sorted_uid_time_locid_[i][2]
                    daynum_time_day_ = int(time_day_.split('-')[1]) - 1
                    time_h = int(time_hms_.split('-')[0].split(':')[0])
                    if time_h > 11:
                        time_h = 1
                    else:
                        time_h = 0
                    relation_time = time_h * daynum_time_day_ + daynum_time_day_
                    sorted_uid_time_locid_[i] = (locid_, relation_time)
                    if locid_ not in dict_locid_to_entity:
                        dict_locid_to_entity[locid_] = index
                        index += 1
                dict_uid_locid_relation[uid_name[u_i]] = sorted_uid_time_locid_
        uid_name = list(set(dict_uid_locid_relation.keys()))
        locid_name = list(set(dict_locid_to_entity.keys()))
        print('/6')
        test_X_num = self.dict_restrict['test_X_num']
        test_Y_num = self.dict_restrict['test_Y_num']
        train_set,test_set,test_X_set,test_Y_set,relation_time_set,data_set = [],[],[],[],[],[]
        for i in range(len(uid_name)):
            uid_locid_relation_ = dict_uid_locid_relation[uid_name[i]]
            data_set_, catid_set_ = [], []
            for j in range(len(uid_locid_relation_)):
                locid_ = uid_locid_relation_[j][0]
                catid_ = dict_locid_catid[locid_][0]
                entity_locid_ = dict_locid_to_entity[locid_]
                relation_time_ = uid_locid_relation_[j][1]
                data_set_.append(entity_locid_)
                relation_time_set.append(relation_time_)
                catid_set_.append(catid_)
            data_set.append(data_set_)
            train_set_ = data_set_[:len(data_set_) - test_X_num - test_Y_num]
            test_X_set_ = data_set_[len(data_set_) - test_X_num -
                                    test_Y_num:len(data_set_) - test_Y_num]
            test_Y_set_ = data_set_[len(data_set_) - test_Y_num:]
            test_set_ = data_set_[len(data_set_) - test_X_num - test_Y_num:]
            train_set.append(train_set_)
            test_set.append(test_set_)
            test_X_set.append(test_X_set_)
            test_Y_set.append(test_Y_set_)
        self.test_X_set = np.array(test_X_set)
        relation_time_set = list(set(relation_time_set))
        entity_set = list(set(dict_locid_to_entity.values()))
        print('/7')
        dict_uid_category_to_entity = {}
        for i in range(len(uid_category_set)):
            dict_uid_category_to_entity[
                uid_category_set[i]] = uid_category_set[i] + len(entity_set)
        dict_catid_to_entity = {}
        for i in range(len(catid_set)):
            dict_catid_to_entity[catid_set[i]] = len(entity_set) + len(
                uid_category_set) + catid_set[i]
        dict_relation_catid_to_r = {}
        for i in range(len(relation_catid_set)):
            dict_relation_catid_to_r[relation_catid_set[i]] = len(
                relation_time_set) + relation_catid_set[i]
        print('/8')
        self.dict_KG, self.uid_attribute_category = {}, []
        for i in range(len(uid_name)):
            uid_category_, uid_category_detail_ = dict_uid_category[
                uid_name[i]]
            entity_uid_category_ = dict_uid_category_to_entity[uid_category_]
            uid_locid_relation_ = dict_uid_locid_relation[uid_name[i]]
            gender_, race_ = uid_category_detail_[0], uid_category_detail_[1]
            self.uid_attribute_category.append((gender_, race_))
            for j in range(len(uid_locid_relation_)):
                locid_ = uid_locid_relation_[j][0]
                relation_time_ = uid_locid_relation_[j][1]
                entity_locid_ = dict_locid_to_entity[locid_]
                catid_, relation_catid_ = dict_locid_catid[locid_][
                    0], dict_locid_catid[locid_][1]
                entity_catid_ = dict_catid_to_entity[catid_]
                relation_catid_ = dict_relation_catid_to_r[relation_catid_]
                if entity_locid_ not in self.dict_KG:
                    self.dict_KG[entity_locid_] = {}
                    for k in range(len(self.gender_set)):
                        self.dict_KG[entity_locid_]['gender' + '-' + str(
                            self.gender_set[k])] = []
                        if self.gender_set[k] == gender_:
                            self.dict_KG[entity_locid_]['gender' + '-' + str(
                                self.gender_set[k])].append([
                                    entity_locid_, relation_time_,
                                    entity_uid_category_
                                ])
                            self.dict_KG[entity_locid_]['gender' + '-' + str(
                                self.gender_set[k])].append([
                                    entity_locid_, relation_catid_,
                                    entity_catid_
                                ])
                    for k in range(len(self.race_set)):
                        self.dict_KG[entity_locid_]['race' + '-' + str(
                            self.race_set[k])] = []
                        if self.race_set[k] == race_:
                            self.dict_KG[entity_locid_]['race' + '-' + str(
                                self.race_set[k])].append([
                                    entity_locid_, relation_time_,
                                    entity_uid_category_
                                ])
                            self.dict_KG[entity_locid_]['race' + '-' + str(
                                self.race_set[k])].append([
                                    entity_locid_, relation_catid_,
                                    entity_catid_
                                ])
                else:
                    for k in range(len(self.gender_set)):
                        if self.gender_set[k] == gender_:
                            self.dict_KG[entity_locid_]['gender' + '-' + str(
                                self.gender_set[k])].append([
                                    entity_locid_, relation_time_,
                                    entity_uid_category_
                                ])
                            self.dict_KG[entity_locid_]['gender' + '-' + str(
                                self.gender_set[k])].append([
                                    entity_locid_, relation_catid_,
                                    entity_catid_
                                ])
                    for k in range(len(self.race_set)):
                        if self.race_set[k] == race_:
                            self.dict_KG[entity_locid_]['race' + '-' + str(
                                self.race_set[k])].append([
                                    entity_locid_, relation_time_,
                                    entity_uid_category_
                                ])
                            self.dict_KG[entity_locid_]['race' + '-' + str(
                                self.race_set[k])].append([
                                    entity_locid_, relation_catid_,
                                    entity_catid_
                                ])

        self.dict_itemid_upfdf = dict()
        for i in range(len(uid_name)):
            uid_upf_ = self.uid_attribute_category[i]
            uid_data_set_ = data_set[i]
            for j in range(len(uid_data_set_)):
                if uid_data_set_[j] not in self.dict_itemid_upfdf:
                    self.dict_itemid_upfdf[uid_data_set_[j]] = list()
                self.dict_itemid_upfdf[uid_data_set_[j]].append(uid_upf_)
        itemid_name = list(set(self.dict_itemid_upfdf.keys()))
        for i in range(len(itemid_name)):
            itemid_upfdf_ = self.dict_itemid_upfdf[itemid_name[i]]
            b_0, b_1, b_2, g_0, g_1, g_2 = 0, 0, 0, 0, 0, 0
            for j in range(len(itemid_upfdf_)):
                if itemid_upfdf_[j] == (0, 0):
                    b_0 += 1
                elif itemid_upfdf_[j] == (0, 1):
                    b_1 += 1
                elif itemid_upfdf_[j] == (0, 2):
                    b_2 += 1
                elif itemid_upfdf_[j] == (1, 0):
                    g_0 += 1
                elif itemid_upfdf_[j] == (1, 1):
                    g_1 += 1
                elif itemid_upfdf_[j] == (1, 2):
                    g_2 += 1
                else:
                    print('Error!!')
            b_0_p, b_1_p, b_2_p = b_0 / len(itemid_upfdf_), b_1 / len(
                itemid_upfdf_), b_2 / len(itemid_upfdf_)
            g_0_p, g_1_p, g_2_p = g_0 / len(itemid_upfdf_), g_1 / len(
                itemid_upfdf_), g_2 / len(itemid_upfdf_)
            self.dict_itemid_upfdf[itemid_name[i]] = [
                b_0_p, b_1_p, b_2_p, g_0_p, g_1_p, g_2_p
            ]

        num_user = len(uid_name)
        num_item = len(entity_set)
        n_entities = len(entity_set) + len(uid_category_set) + len(catid_set)
        n_relation = len(relation_time_set) + len(relation_catid_set)

        train = Interactions(train_set, num_user, num_item)
        train.to_sequence(self.L_hgn, self.T_hgn)

        sequences_np = train.sequences.sequences
        targets_np = train.sequences.targets
        users_np = train.sequences.user_ids
        train_matrix = train.tocsr()

        param_ = [self.args, num_user, num_item, n_entities, n_relation]
        data_ = [users_np, sequences_np, targets_np, train_matrix]
        test_ = [
            train, self.test_X_set, test_Y_set, self.uid_attribute_category
        ]

        train_data, test_data = list(), list()
        entity_set = set(entity_set)
        for i in range(len(train_set)):
            uid_ = i
            neg_entity_set_ = list(entity_set - set(train_set[i]))
            neg_train_set_ = random.sample(neg_entity_set_, len(train_set[i]))
            for j in range(len(train_set[i])):
                train_data.append([uid_, train_set[i][j], 1])
                train_data.append([uid_, neg_train_set_[j], 0])
        for i in range(len(test_set)):
            uid_ = i
            neg_entity_set_ = list(entity_set - set(test_set[i]))
            neg_test_set_ = random.sample(neg_entity_set_, len(test_set[i]))
            for j in range(len(test_set[i])):
                test_data.append([uid_, test_set[i][j], 1])
                test_data.append([uid_, neg_test_set_[j], 0])
        train_data = np.array(train_data)
        eval_data = train_data
        test_data = np.array(test_data)

        user_history_dict = dict()
        for i in range(len(data_set)):
            uid_ = i
            user_history_dict[uid_] = data_set[i]

        pickle_data = {
            'train_data': train_data,
            'eval_data': eval_data,
            'test_data': test_data,
            'n_entity': n_entities,
            'n_relation': n_relation,
            'kg_np': self.KG_random_generator(),
            'user_history_dict': user_history_dict,
        }
        return param_, data_, test_
Пример #2
0
    model_parser.add_argument('--nv', type=int, default=4)
    model_parser.add_argument('--nh', type=int, default=16)
    model_parser.add_argument('--drop', type=float, default=0.5)
    model_parser.add_argument('--ac_conv', type=str, default='relu')
    model_parser.add_argument('--ac_fc', type=str, default='relu')

    model_config = model_parser.parse_args()
    model_config.L = config.L

    # set seed
    set_seed(config.seed,
             cuda=config.use_cuda)

    # load dataset
    train = Interactions(config.train_root)
    train.tocsr()
    
    # transform triplets to sequence representation
    train.to_sequence(config.L, config.T)

    test = Interactions(config.test_root,
                        user_map=train.user_map,
                        item_map=train.item_map)
    test.tocsr()

    print(config)
    print(model_config)
    # fit model
    model = Recommender(n_iter=config.n_iter,
                        batch_size=config.batch_size,
                        learning_rate=config.learning_rate,