def get_test_input(news_r_test, news_index_test): impression_index,all_browsed_test, all_candidate_test, all_label_test = preprocess_test_user_data('../../data/MINDsmall_dev/behaviors.tsv') user_browsed_news_test = np.zeros((len(all_browsed_test), MAX_BROWSED, 300), dtype='float32') for i, user_browsed in enumerate(all_browsed_test): j = 0 for news in user_browsed: if j < MAX_BROWSED: user_browsed_news_test[i][j] = news_r_test[news_index_test[news]] j += 1 all_candidate_news_test = np.array([news_r_test[news_index_test[i[0]]] for i in all_candidate_test]) all_label_test = np.array(all_label_test) return user_browsed_news_test, all_candidate_news_test, all_label_test, impression_index
def get_test_input(news_r_test, news_index_test): impression_index,all_browsed_test, all_candidate_test, all_label_test = preprocess_test_user_data('../../data/MINDsmall_dev/behaviors.tsv') user_browsed_news_test = np.zeros((len(all_browsed_test), Config.max_browsed, Config.num_filters * len(Config.window_size)), dtype='float32') for i, user_browsed in enumerate(all_browsed_test): j = 0 for news in user_browsed: if j < Config.max_browsed: user_browsed_news_test[i][j] = news_r_test[news_index_test[news]] j += 1 all_candidate_news_test = np.array([news_r_test[news_index_test[i[0]]] for i in all_candidate_test]) all_label_test = np.array(all_label_test) return user_browsed_news_test, all_candidate_news_test, all_label_test, impression_index
def get_test_input(news_index_test, news_r_test): impression_index, user_index, user_browsed_test, all_user_test, all_candidate_test, all_label_test = preprocess_test_user_data( '../../data/MINDsmall_dev/behaviors.tsv') print('preprocessing testing input...') user_browsed_title_test = np.zeros( (len(user_browsed_test), config.max_browsed, config.embedding_dim), dtype='float32') for i, user_browsed in enumerate(user_browsed_test): j = 0 for news in user_browsed: if j < config.max_browsed: user_browsed_title_test[i][j] = news_r_test[ news_index_test[news]] j += 1 all_candidate_title_test = np.array( [news_r_test[news_index_test[i[0]]] for i in all_candidate_test]) all_label_test = np.array(all_label_test) return impression_index, user_index, user_browsed_title_test, all_user_test, all_candidate_title_test, all_label_test
def get_test_input(news_index_test, news_r_test): # impression_index, all_browsed_test, all_candidate_test, all_label_test = preprocess_test_user_data('../../data/MINDsmall_dev/behaviors.tsv') impression_index, user_index, user_browsed_test, all_user_test, all_candidate_test, all_label_test = preprocess_test_user_data( '../../data/MINDsmall_dev/behaviors.tsv') print('preprocessing testing input...') user_browsed_title_test = np.zeros( (len(user_browsed_test), MAX_BROWSED, 256), dtype='float32') # user_browsed_title_test = np.array([[ np.zeros(256, dtype='float32') for i in range(MAX_BROWSED)] for _ in user_browsed_test]) for i, user_browsed in enumerate(user_browsed_test): j = 0 for news in user_browsed: if j < MAX_BROWSED: user_browsed_title_test[i][j] = news_r_test[ news_index_test[news]] j += 1 all_candidate_title_test = np.array( [news_r_test[news_index_test[i[0]]] for i in all_candidate_test]) all_label_test = np.array(all_label_test) return impression_index, user_index, user_browsed_title_test, all_user_test, all_candidate_title_test, all_label_test
user_id_test = np.array(all_user_test) all_candidate_news_test = np.array( [news_r_test[news_index_test[i[0]]] for i in all_candidate_test]) all_label_test = np.array(all_label_test) return user_browsed_news_test, user_id_test, all_candidate_news_test, all_label_test, impression_index, user_index, all_user_test if __name__ == "__main__": word_index, category_map, subcategory_map, news_category, news_subcategory, news_title, news_index, news_index_test, all_news_test = preprocess_news_data( '../../data/MINDsmall_train/news.tsv', '../../data/MINDsmall_dev/news.tsv') print('Preprocessing trainning input...') all_browsed, all_candidate, all_label, all_user, user_index = get_train_input( news_category, news_subcategory, news_title, news_index) impression_index, user_index, user_browsed_test, all_user_test, all_candidate_test, all_label_test, user_index_test = preprocess_test_user_data( '../../data/MINDsmall_dev/behaviors.tsv', user_index) news_encoder, user_encoder, model, model_test = build_model( word_index, category_map, subcategory_map, user_index, TYPE) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) # from tensorflow.keras.utils import plot_model # plot_model(model, to_file='model.png', show_shapes=True) # plot_model(model_test, to_file='model_test.png', show_shapes=True) # plot_model(news_encoder, to_file='news_encoder.png', show_shapes=True) # plot_model(user_encoder, to_file='user_encoder.png', show_shapes=True) train_data = {} train_data['browsed'] = np.array(all_browsed)
print('ndcg5: ', ndcg5) print('ndcg10: ', ndcg10) if __name__ == "__main__": news_index = np.load('news/news_index.npy', allow_pickle=True).item() news_index_test = np.load('news/news_index_test.npy', allow_pickle=True).item() word_index = np.load('news/word_index.npy', allow_pickle=True).item() news_title = np.load('news/news_title.npy', allow_pickle=True) news_category = np.load('news/news_category.npy', allow_pickle=True) news_subcategory = np.load('news/news_subcategory.npy', allow_pickle=True) all_news_test = np.load('news/all_news_test.npy', allow_pickle=True) all_browsed, all_candidate, all_label, all_user, user_index, all_browsed_len = get_train_input( news_category, news_subcategory, news_title, news_index) impression_index, user_index, user_browsed_test, all_user_test, all_candidate_test, all_label_test,\ user_index_test, user_browsed_len_test = preprocess_test_user_data('../../data/MINDsmall_dev/behaviors.tsv', user_index) pretrained_embedding = torch.from_numpy( get_embedding_matrix(word_index)).float() dataset = MyDataset(all_browsed, all_candidate, all_user, all_browsed_len, all_label) train_data = DataLoader(dataset=dataset, batch_size=Config.batch_size, shuffle=True, num_workers=Config.num_workers) model = LSTUR(Config, pretrained_embedding).to(device) train(model, train_data, all_news_test, news_index_test, impression_index, user_index, user_browsed_test, all_user_test, all_candidate_test, all_label_test, user_browsed_len_test)