示例#1
0
def get_test_input(news_r_test, news_index_test):
    impression_index,all_browsed_test, all_candidate_test, all_label_test = preprocess_test_user_data('../../data/MINDsmall_dev/behaviors.tsv')
    
    user_browsed_news_test = np.zeros((len(all_browsed_test), MAX_BROWSED, 300), dtype='float32')
    for i, user_browsed in enumerate(all_browsed_test):
        j = 0
        for news in user_browsed:
            if j < MAX_BROWSED:
                user_browsed_news_test[i][j] = news_r_test[news_index_test[news]]
            j += 1
    all_candidate_news_test = np.array([news_r_test[news_index_test[i[0]]] for i in all_candidate_test])
    all_label_test = np.array(all_label_test)
    return user_browsed_news_test, all_candidate_news_test, all_label_test, impression_index
示例#2
0
def get_test_input(news_r_test, news_index_test):
    impression_index,all_browsed_test, all_candidate_test, all_label_test = preprocess_test_user_data('../../data/MINDsmall_dev/behaviors.tsv')
    
    user_browsed_news_test = np.zeros((len(all_browsed_test), Config.max_browsed, Config.num_filters * len(Config.window_size)), dtype='float32')
    for i, user_browsed in enumerate(all_browsed_test):
        j = 0
        for news in user_browsed:
            if j < Config.max_browsed:
                user_browsed_news_test[i][j] = news_r_test[news_index_test[news]]
            j += 1
    all_candidate_news_test = np.array([news_r_test[news_index_test[i[0]]] for i in all_candidate_test])
    all_label_test = np.array(all_label_test)
    return user_browsed_news_test, all_candidate_news_test, all_label_test, impression_index
示例#3
0
def get_test_input(news_index_test, news_r_test):
    impression_index, user_index, user_browsed_test, all_user_test, all_candidate_test, all_label_test = preprocess_test_user_data(
        '../../data/MINDsmall_dev/behaviors.tsv')
    print('preprocessing testing input...')
    user_browsed_title_test = np.zeros(
        (len(user_browsed_test), config.max_browsed, config.embedding_dim),
        dtype='float32')
    for i, user_browsed in enumerate(user_browsed_test):
        j = 0
        for news in user_browsed:
            if j < config.max_browsed:
                user_browsed_title_test[i][j] = news_r_test[
                    news_index_test[news]]
            j += 1
    all_candidate_title_test = np.array(
        [news_r_test[news_index_test[i[0]]] for i in all_candidate_test])
    all_label_test = np.array(all_label_test)
    return impression_index, user_index, user_browsed_title_test, all_user_test, all_candidate_title_test, all_label_test
示例#4
0
def get_test_input(news_index_test, news_r_test):
    # impression_index, all_browsed_test, all_candidate_test, all_label_test = preprocess_test_user_data('../../data/MINDsmall_dev/behaviors.tsv')
    impression_index, user_index, user_browsed_test, all_user_test, all_candidate_test, all_label_test = preprocess_test_user_data(
        '../../data/MINDsmall_dev/behaviors.tsv')
    print('preprocessing testing input...')
    user_browsed_title_test = np.zeros(
        (len(user_browsed_test), MAX_BROWSED, 256), dtype='float32')
    # user_browsed_title_test = np.array([[ np.zeros(256, dtype='float32') for i in range(MAX_BROWSED)] for _ in user_browsed_test])
    for i, user_browsed in enumerate(user_browsed_test):
        j = 0
        for news in user_browsed:
            if j < MAX_BROWSED:
                user_browsed_title_test[i][j] = news_r_test[
                    news_index_test[news]]
            j += 1
    all_candidate_title_test = np.array(
        [news_r_test[news_index_test[i[0]]] for i in all_candidate_test])
    all_label_test = np.array(all_label_test)
    return impression_index, user_index, user_browsed_title_test, all_user_test, all_candidate_title_test, all_label_test
示例#5
0
    user_id_test = np.array(all_user_test)
    all_candidate_news_test = np.array(
        [news_r_test[news_index_test[i[0]]] for i in all_candidate_test])
    all_label_test = np.array(all_label_test)
    return user_browsed_news_test, user_id_test, all_candidate_news_test, all_label_test, impression_index, user_index, all_user_test


if __name__ == "__main__":
    word_index, category_map, subcategory_map, news_category, news_subcategory, news_title, news_index, news_index_test, all_news_test = preprocess_news_data(
        '../../data/MINDsmall_train/news.tsv',
        '../../data/MINDsmall_dev/news.tsv')

    print('Preprocessing trainning input...')
    all_browsed, all_candidate, all_label, all_user, user_index = get_train_input(
        news_category, news_subcategory, news_title, news_index)
    impression_index, user_index, user_browsed_test, all_user_test, all_candidate_test, all_label_test, user_index_test = preprocess_test_user_data(
        '../../data/MINDsmall_dev/behaviors.tsv', user_index)

    news_encoder, user_encoder, model, model_test = build_model(
        word_index, category_map, subcategory_map, user_index, TYPE)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['acc'])

    # from tensorflow.keras.utils import plot_model
    # plot_model(model, to_file='model.png', show_shapes=True)
    # plot_model(model_test, to_file='model_test.png', show_shapes=True)
    # plot_model(news_encoder, to_file='news_encoder.png', show_shapes=True)
    # plot_model(user_encoder, to_file='user_encoder.png', show_shapes=True)

    train_data = {}
    train_data['browsed'] = np.array(all_browsed)
示例#6
0
    print('ndcg5: ', ndcg5)
    print('ndcg10: ', ndcg10)


if __name__ == "__main__":
    news_index = np.load('news/news_index.npy', allow_pickle=True).item()
    news_index_test = np.load('news/news_index_test.npy',
                              allow_pickle=True).item()
    word_index = np.load('news/word_index.npy', allow_pickle=True).item()
    news_title = np.load('news/news_title.npy', allow_pickle=True)
    news_category = np.load('news/news_category.npy', allow_pickle=True)
    news_subcategory = np.load('news/news_subcategory.npy', allow_pickle=True)
    all_news_test = np.load('news/all_news_test.npy', allow_pickle=True)

    all_browsed, all_candidate, all_label, all_user, user_index, all_browsed_len = get_train_input(
        news_category, news_subcategory, news_title, news_index)
    impression_index, user_index, user_browsed_test, all_user_test, all_candidate_test, all_label_test,\
        user_index_test, user_browsed_len_test = preprocess_test_user_data('../../data/MINDsmall_dev/behaviors.tsv', user_index)

    pretrained_embedding = torch.from_numpy(
        get_embedding_matrix(word_index)).float()
    dataset = MyDataset(all_browsed, all_candidate, all_user, all_browsed_len,
                        all_label)
    train_data = DataLoader(dataset=dataset,
                            batch_size=Config.batch_size,
                            shuffle=True,
                            num_workers=Config.num_workers)
    model = LSTUR(Config, pretrained_embedding).to(device)
    train(model, train_data, all_news_test, news_index_test, impression_index,
          user_index, user_browsed_test, all_user_test, all_candidate_test,
          all_label_test, user_browsed_len_test)