Exemplo n.º 1
0
def get_train_input(news_index, news_title):
    all_browsed_news, all_click, all_unclick, all_candidate, all_label = preprocess_user_data(
        '../../data/MINDsmall_train/behaviors.tsv')
    print('preprocessing trainning input...')
    all_browsed_title = np.zeros(
        (len(all_browsed_news), MAX_BROWSED, MAX_TITLE_LENGTH), dtype='int32')
    # all_browsed_title = np.array([[ np.zeros(MAX_TITLE_LENGTH, dtype='int32')for i in range(MAX_BROWSED)] for _ in all_browsed_news])
    for i, user_browsed in enumerate(all_browsed_news):
        j = 0
        for news in user_browsed:
            if j < MAX_BROWSED:
                all_browsed_title[i][j] = news_title[news_index[news]]
            j += 1

    all_candidate_title = np.array([[news_title[news_index[j]] for j in i]
                                    for i in all_candidate])
    all_label = np.array(all_label)
    return all_browsed_title, all_candidate_title, all_label
Exemplo n.º 2
0
def get_train_input(news_index, news_title, news_category):
    all_browsed_news, all_click, all_unclick, all_candidate, all_label = preprocess_user_data('../../data/MINDsmall_train/behaviors.tsv')
    print('preprocessing trainning input...')
    all_browsed_title = np.zeros((len(all_browsed_news), Config.max_browsed, Config.max_title_len), dtype='int32')
    for i, user_browsed in enumerate(all_browsed_news):
        j = 0
        for news in user_browsed:
            if j < Config.max_browsed:
                all_browsed_title[i][j] = news_title[news_index[news]]
            j += 1

    all_candidate_title = np.array([[ news_title[news_index[j]] for j in i] for i in all_candidate])
    all_label = np.array(all_label)

    all_topic_label = np.zeros((len(all_browsed_news), Config.max_browsed + 1 + Config.neg_sample, 1), dtype='int32')
    for i, user_browsed in enumerate(all_browsed_news):
        j = 0
        for news in user_browsed:
            if j < Config.max_browsed:
                all_topic_label[i][j] = news_category[news_index[news]]
    return all_browsed_title, all_candidate_title, all_label, all_topic_label
Exemplo n.º 3
0
def get_train_input(news_title, news_index, news_entity):
    all_browsed_news, all_candidate, all_label = preprocess_user_data('../../data/MINDsmall_train/behaviors.tsv')
    
    all_browsed_title = np.zeros((len(all_browsed_news), MAX_BROWSED, MAX_TITLE_LENGTH), dtype='int32')
    for i, user_browsed in enumerate(all_browsed_news):
        j = 0
        for news in user_browsed:
            if j < MAX_BROWSED:
                all_browsed_title[i][j] = news_title[news_index[news]]
            j += 1
    all_browsed_entity = np.zeros((len(all_browsed_news), MAX_BROWSED, MAX_ENTITY_LENGTH), dtype='int32')
    for i, user_browsed in enumerate(all_browsed_news):
        j = 0
        for news in user_browsed:
            if j < MAX_BROWSED:
                all_browsed_entity[i][j] = news_entity[news_index[news]]
            j += 1
    all_browsed = np.concatenate((all_browsed_title, all_browsed_entity), axis=-1)

    all_candidate_title = np.array([news_title[news_index[i[0]]] for i in all_candidate])
    all_candidate_entity = np.array([news_entity[news_index[i[0]]] for i in all_candidate])
    all_candidate = np.concatenate((all_candidate_title, all_candidate_entity), axis=-1)
    all_label = np.array(all_label)
    return all_browsed, all_candidate, all_label
Exemplo n.º 4
0
def get_train_input(news_title, news_index, news_entity):
    all_browsed_news, all_click, all_unclick, all_candidate, all_label = preprocess_user_data('../../data/MINDsmall_train/behaviors.tsv')
    
    all_browsed_title = np.zeros((len(all_browsed_news), Config.max_browsed, Config.max_title_len), dtype='int32')
    for i, user_browsed in enumerate(all_browsed_news):
        j = 0
        for news in user_browsed:
            if j < Config.max_browsed:
                all_browsed_title[i][j] = news_title[news_index[news]]
            j += 1
    all_browsed_entity = np.zeros((len(all_browsed_news), Config.max_browsed, Config.max_title_len), dtype='int32')
    for i, user_browsed in enumerate(all_browsed_news):
        j = 0
        for news in user_browsed:
            if j < Config.max_browsed:
                all_browsed_entity[i][j] = news_entity[news_index[news]]
            j += 1
    all_browsed = np.concatenate((all_browsed_title, all_browsed_entity), axis=-1)

    all_candidate_title = np.array([[ news_title[news_index[j]] for j in i] for i in all_candidate])
    all_candidate_entity = np.array([[ news_entity[news_index[j]] for j in i] for i in all_candidate])
    all_candidate = np.concatenate((all_candidate_title, all_candidate_entity), axis=-1)
    all_label = np.array(all_label)
    return all_browsed, all_candidate, all_label
Exemplo n.º 5
0
Arquivo: NAML.py Projeto: nnnyt/MIND
def get_train_input(news_category, news_subcategory, news_abstract, news_title,
                    news_index):
    all_browsed_news, all_click, all_unclick, all_candidate, all_label = preprocess_user_data(
        '../../data/MINDsmall_train/behaviors.tsv')

    all_browsed_title = np.zeros(
        (len(all_browsed_news), MAX_BROWSED, MAX_TITLE_LENGTH), dtype='int32')
    for i, user_browsed in enumerate(all_browsed_news):
        j = 0
        for news in user_browsed:
            if j < MAX_BROWSED:
                all_browsed_title[i][j] = news_title[news_index[news]]
            j += 1

    all_browsed_abstract = np.zeros(
        (len(all_browsed_news), MAX_BROWSED, MAX_ABSTRACT_LENGTH),
        dtype='int32')
    for i, user_browsed in enumerate(all_browsed_news):
        j = 0
        for news in user_browsed:
            if j < MAX_BROWSED:
                all_browsed_abstract[i][j] = news_abstract[news_index[news]]
            j += 1

    all_browsed_category = np.zeros((len(all_browsed_news), MAX_BROWSED, 1),
                                    dtype='int32')
    for i, user_browsed in enumerate(all_browsed_news):
        j = 0
        for news in user_browsed:
            if j < MAX_BROWSED:
                all_browsed_category[i][j] = news_category[news_index[news]]
            j += 1

    all_browsed_subcategory = np.zeros((len(all_browsed_news), MAX_BROWSED, 1),
                                       dtype='int32')
    for i, user_browsed in enumerate(all_browsed_news):
        j = 0
        for news in user_browsed:
            if j < MAX_BROWSED:
                all_browsed_subcategory[i][j] = news_subcategory[
                    news_index[news]]
            j += 1

    all_browsed = np.concatenate(
        (all_browsed_title, all_browsed_abstract, all_browsed_category,
         all_browsed_subcategory),
        axis=-1)
    all_candidate_title = np.array([[news_title[news_index[j]] for j in i]
                                    for i in all_candidate])
    all_candidate_abstract = np.array(
        [[news_abstract[news_index[j]] for j in i] for i in all_candidate])
    all_candidate_category = np.array(
        [[news_category[news_index[j]] for j in i] for i in all_candidate])
    all_candidate_subcategory = np.array(
        [[news_subcategory[news_index[j]] for j in i] for i in all_candidate])
    all_candidate = np.concatenate(
        (all_candidate_title, all_candidate_abstract, all_candidate_category,
         all_candidate_subcategory),
        axis=-1)
    all_label = np.array(all_label)
    return all_browsed, all_candidate, all_label