def get_train_input(news_index, news_title): all_browsed_news, all_click, all_unclick, all_candidate, all_label = preprocess_user_data( '../../data/MINDsmall_train/behaviors.tsv') print('preprocessing trainning input...') all_browsed_title = np.zeros( (len(all_browsed_news), MAX_BROWSED, MAX_TITLE_LENGTH), dtype='int32') # all_browsed_title = np.array([[ np.zeros(MAX_TITLE_LENGTH, dtype='int32')for i in range(MAX_BROWSED)] for _ in all_browsed_news]) for i, user_browsed in enumerate(all_browsed_news): j = 0 for news in user_browsed: if j < MAX_BROWSED: all_browsed_title[i][j] = news_title[news_index[news]] j += 1 all_candidate_title = np.array([[news_title[news_index[j]] for j in i] for i in all_candidate]) all_label = np.array(all_label) return all_browsed_title, all_candidate_title, all_label
def get_train_input(news_index, news_title, news_category): all_browsed_news, all_click, all_unclick, all_candidate, all_label = preprocess_user_data('../../data/MINDsmall_train/behaviors.tsv') print('preprocessing trainning input...') all_browsed_title = np.zeros((len(all_browsed_news), Config.max_browsed, Config.max_title_len), dtype='int32') for i, user_browsed in enumerate(all_browsed_news): j = 0 for news in user_browsed: if j < Config.max_browsed: all_browsed_title[i][j] = news_title[news_index[news]] j += 1 all_candidate_title = np.array([[ news_title[news_index[j]] for j in i] for i in all_candidate]) all_label = np.array(all_label) all_topic_label = np.zeros((len(all_browsed_news), Config.max_browsed + 1 + Config.neg_sample, 1), dtype='int32') for i, user_browsed in enumerate(all_browsed_news): j = 0 for news in user_browsed: if j < Config.max_browsed: all_topic_label[i][j] = news_category[news_index[news]] return all_browsed_title, all_candidate_title, all_label, all_topic_label
def get_train_input(news_title, news_index, news_entity): all_browsed_news, all_candidate, all_label = preprocess_user_data('../../data/MINDsmall_train/behaviors.tsv') all_browsed_title = np.zeros((len(all_browsed_news), MAX_BROWSED, MAX_TITLE_LENGTH), dtype='int32') for i, user_browsed in enumerate(all_browsed_news): j = 0 for news in user_browsed: if j < MAX_BROWSED: all_browsed_title[i][j] = news_title[news_index[news]] j += 1 all_browsed_entity = np.zeros((len(all_browsed_news), MAX_BROWSED, MAX_ENTITY_LENGTH), dtype='int32') for i, user_browsed in enumerate(all_browsed_news): j = 0 for news in user_browsed: if j < MAX_BROWSED: all_browsed_entity[i][j] = news_entity[news_index[news]] j += 1 all_browsed = np.concatenate((all_browsed_title, all_browsed_entity), axis=-1) all_candidate_title = np.array([news_title[news_index[i[0]]] for i in all_candidate]) all_candidate_entity = np.array([news_entity[news_index[i[0]]] for i in all_candidate]) all_candidate = np.concatenate((all_candidate_title, all_candidate_entity), axis=-1) all_label = np.array(all_label) return all_browsed, all_candidate, all_label
def get_train_input(news_title, news_index, news_entity): all_browsed_news, all_click, all_unclick, all_candidate, all_label = preprocess_user_data('../../data/MINDsmall_train/behaviors.tsv') all_browsed_title = np.zeros((len(all_browsed_news), Config.max_browsed, Config.max_title_len), dtype='int32') for i, user_browsed in enumerate(all_browsed_news): j = 0 for news in user_browsed: if j < Config.max_browsed: all_browsed_title[i][j] = news_title[news_index[news]] j += 1 all_browsed_entity = np.zeros((len(all_browsed_news), Config.max_browsed, Config.max_title_len), dtype='int32') for i, user_browsed in enumerate(all_browsed_news): j = 0 for news in user_browsed: if j < Config.max_browsed: all_browsed_entity[i][j] = news_entity[news_index[news]] j += 1 all_browsed = np.concatenate((all_browsed_title, all_browsed_entity), axis=-1) all_candidate_title = np.array([[ news_title[news_index[j]] for j in i] for i in all_candidate]) all_candidate_entity = np.array([[ news_entity[news_index[j]] for j in i] for i in all_candidate]) all_candidate = np.concatenate((all_candidate_title, all_candidate_entity), axis=-1) all_label = np.array(all_label) return all_browsed, all_candidate, all_label
def get_train_input(news_category, news_subcategory, news_abstract, news_title, news_index): all_browsed_news, all_click, all_unclick, all_candidate, all_label = preprocess_user_data( '../../data/MINDsmall_train/behaviors.tsv') all_browsed_title = np.zeros( (len(all_browsed_news), MAX_BROWSED, MAX_TITLE_LENGTH), dtype='int32') for i, user_browsed in enumerate(all_browsed_news): j = 0 for news in user_browsed: if j < MAX_BROWSED: all_browsed_title[i][j] = news_title[news_index[news]] j += 1 all_browsed_abstract = np.zeros( (len(all_browsed_news), MAX_BROWSED, MAX_ABSTRACT_LENGTH), dtype='int32') for i, user_browsed in enumerate(all_browsed_news): j = 0 for news in user_browsed: if j < MAX_BROWSED: all_browsed_abstract[i][j] = news_abstract[news_index[news]] j += 1 all_browsed_category = np.zeros((len(all_browsed_news), MAX_BROWSED, 1), dtype='int32') for i, user_browsed in enumerate(all_browsed_news): j = 0 for news in user_browsed: if j < MAX_BROWSED: all_browsed_category[i][j] = news_category[news_index[news]] j += 1 all_browsed_subcategory = np.zeros((len(all_browsed_news), MAX_BROWSED, 1), dtype='int32') for i, user_browsed in enumerate(all_browsed_news): j = 0 for news in user_browsed: if j < MAX_BROWSED: all_browsed_subcategory[i][j] = news_subcategory[ news_index[news]] j += 1 all_browsed = np.concatenate( (all_browsed_title, all_browsed_abstract, all_browsed_category, all_browsed_subcategory), axis=-1) all_candidate_title = np.array([[news_title[news_index[j]] for j in i] for i in all_candidate]) all_candidate_abstract = np.array( [[news_abstract[news_index[j]] for j in i] for i in all_candidate]) all_candidate_category = np.array( [[news_category[news_index[j]] for j in i] for i in all_candidate]) all_candidate_subcategory = np.array( [[news_subcategory[news_index[j]] for j in i] for i in all_candidate]) all_candidate = np.concatenate( (all_candidate_title, all_candidate_abstract, all_candidate_category, all_candidate_subcategory), axis=-1) all_label = np.array(all_label) return all_browsed, all_candidate, all_label