Python Attention.Attention примеры использования

Язык программирования: Python

Пространство имен/Пакет: softattention

Класс/Тип: Attention

Метод/Функция: Attention

Примеров на hotexamples.com: 9

Python Attention.Attention - 9 примеров найдено. Это лучшие примеры Python кода для softattention.Attention.Attention, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Attention(9)

Основные методы

Attention (9)

Пример #1

Показать файл

Файл: hier_lstm.py Проект: zl1732/Automated-Scoring-System

def build_attention2_model(opts, vocab_size=0, maxnum=50, maxlen=50, embedd_dim=50, embedding_weights=None, verbose=False, init_mean_value=None):
    N = maxnum
    L = maxlen

    logger = get_logger('Build attention pooling model')
    logger.info("Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, lstm_units = %s, drop rate = %s, l2 = %s" % (N, L, embedd_dim,
        opts.lstm_units, opts.dropout, opts.l2_value))

    model = Sequential()
    model.add(Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=N*L, weights=embedding_weights,name='x'))
    model.add(Dropout(opts.dropout, name='drop_x'))
    model.add(Reshape((N, L, embedd_dim),dtype='int32', input_shape=(N*L,), name='resh_W'))

    model.add(TimeDistributed(LSTM(opts.lstm_units, return_sequences=True), name='z'))
    model.add(TimeDistributed(Attention(name='att_z')))
    model.add(LSTM(opts.lstm_units, return_sequences=True, name='hz'))
    model.add(Attention(name='attent_hz'))
    model.add(Dense(output_dim=1, activation='sigmoid', name='output')) 
    if opts.init_bias and init_mean_value:
        logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
        bias_value = (np.log(init_mean_value) - np.log(1 - init_mean_value)).astype(K.floatx())
        model.layers[-1].bias = bias_value
    if verbose:
        model.summary()

    start_time = time.time()
    model.compile(loss='mse', optimizer='rmsprop')
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f s" % total_time)

    return model

Пример #2

Показать файл

Файл: hier_lstm.py Проект: tim-hub/aes

def build_attention2_model(opts,
                           vocab_size=0,
                           maxnum=50,
                           maxlen=50,
                           embedd_dim=50,
                           embedding_weights=None,
                           verbose=False,
                           init_mean_value=None):
    N = maxnum
    L = maxlen

    logger = get_logger('Build attention pooling model')
    logger.info(
        "Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, lstm_units = %s, drop rate = %s, l2 = %s"
        % (N, L, embedd_dim, opts.lstm_units, opts.dropout, opts.l2_value))
    word_input = Input(shape=(N * L, ), dtype='int32', name='word_input')
    x = Embedding(output_dim=embedd_dim,
                  input_dim=vocab_size,
                  input_length=N * L,
                  weights=embedding_weights,
                  name='x')(word_input)
    drop_x = Dropout(opts.dropout, name='drop_x')(x)

    resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)

    z = TimeDistributed(LSTM(opts.lstm_units, return_sequences=True),
                        name='z')(resh_W)
    att_z = TimeDistributed(Attention(name='att_z'))(z)

    hz = LSTM(opts.lstm_units, return_sequences=True, name='hz')(att_z)
    # avg_h = MeanOverTime(mask_zero=True, name='avg_h')(hz)
    # avg_hz = GlobalAveragePooling1D(name='avg_hz')(hz)
    attent_hz = Attention(name='attent_hz')(hz)
    y = Dense(output_dim=1, activation='sigmoid', name='output')(attent_hz)

    model = Model(input=word_input, output=y)
    if opts.init_bias and init_mean_value:
        logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
        bias_value = (np.log(init_mean_value) -
                      np.log(1 - init_mean_value)).astype(K.floatx())
        model.layers[-1].b.set_value(bias_value)
    if verbose:
        model.summary()

    start_time = time.time()
    model.compile(loss='mse', optimizer='rmsprop')
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f s" % total_time)

    return model

Пример #3

Показать файл

Файл: hier_networks.py Проект: Chan0717/Muitimodel

def build_model(opts, vocab_size=0, maxnum=50, maxlen=50, embedd_dim=50, embedding_weights=None, verbose=False, init_mean_value=None):
    N = maxnum
    L = maxlen

    p = Input(shape=(4, 2048), dtype='float32', name='p')
    # img_vector = Dense(name='img_vector', units=128)(p)

    word_input = Input(shape=(N * L,), dtype='int32', name='word_input')
    x = Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=N * L, weights=embedding_weights,
                  mask_zero=True, trainable=False, name='x')(word_input)
    x_maskedout = ZeroMaskedEntries(name='x_maskedout')(x)
    drop_x = Dropout(opts.dropout, name='drop_x')(x_maskedout)

    resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)

    cnn_e = TimeDistributed(Conv1D(opts.nbfilters, opts.filter1_len, border_mode='valid'), name='cnn_e')(resh_W)

    att_cnn_e = TimeDistributed(Attention(), name='att_cnn_e')(cnn_e)

    lstm_e = LSTM(opts.lstm_units, return_sequences=True, name='lstm_e')(att_cnn_e)

    G = CoAttention(name='essay')([lstm_e, p])
    avg = GlobalAveragePooling1D()(G)
    final_vec_drop = Dropout(rate=0.5, name='final_vec_drop')(avg)
    if opts.l2_value:
        logger.info("Use l2 regularizers, l2 value = %s" % opts.l2_value)
        y = Dense(units=1, activation='sigmoid', name='output', W_regularizer=l2(opts.l2_value))(final_vec_drop)
    else:
        y = Dense(units=1, activation='sigmoid', name='output')(final_vec_drop)

    model = Model(input=[word_input, p], output=y)

    if opts.init_bias and init_mean_value:
        logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
        bias_value = (np.log(init_mean_value) - np.log(1 - init_mean_value)).astype(K.floatx())
        model.layers[-1].b.set_value(bias_value
                                     )

    if verbose:
        model.summary()

    start_time = time.time()
    model.compile(loss='mse', optimizer='adam')
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f s" % total_time)

    return model

Пример #4

Показать файл

Файл: hier_networks.py Проект: Rokeer/aes

def build_hrcnn_model(opts,
                      vocab_size=0,
                      char_vocabsize=0,
                      maxnum=50,
                      maxlen=50,
                      maxcharlen=20,
                      embedd_dim=50,
                      embedding_weights=None,
                      verbose=False,
                      init_mean_value=None):
    # LSTM stacked over CNN based on sentence level
    N = maxnum
    L = maxlen

    logger.info(
        "Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, nbfilters = %s, filter1_len = %s, drop rate = %s"
        % (N, L, embedd_dim, opts.nbfilters, opts.filter1_len, opts.dropout))

    word_input = Input(shape=(N * L, ), dtype='int32', name='word_input')
    x = Embedding(output_dim=embedd_dim,
                  input_dim=vocab_size,
                  input_length=N * L,
                  weights=embedding_weights,
                  mask_zero=True,
                  name='x')(word_input)
    x_maskedout = ZeroMaskedEntries(name='x_maskedout')(x)
    drop_x = Dropout(opts.dropout, name='drop_x')(x_maskedout)

    resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)

    # add char-based CNN, concatenating with word embedding to compose word representation
    if opts.use_char:
        char_input = Input(shape=(N * L * maxcharlen, ),
                           dtype='int32',
                           name='char_input')
        xc = Embedding(output_dim=opts.char_embedd_dim,
                       input_dim=char_vocabsize,
                       input_length=N * L * maxcharlen,
                       mask_zero=True,
                       name='xc')(char_input)
        xc_masked = ZeroMaskedEntries(name='xc_masked')(xc)
        drop_xc = Dropout(opts.dropout, name='drop_xc')(xc_masked)
        res_xc = Reshape((N * L, maxcharlen, opts.char_embedd_dim),
                         name='res_xc')(drop_xc)
        cnn_xc = TimeDistributed(Conv1D(opts.char_nbfilters,
                                        opts.filter2_len,
                                        padding='valid'),
                                 name='cnn_xc')(res_xc)
        max_xc = TimeDistributed(GlobalMaxPooling1D(), name='avg_xc')(cnn_xc)
        res_xc2 = Reshape((N, L, opts.char_nbfilters), name='res_xc2')(max_xc)

        w_repr = merge([resh_W, res_xc2], mode='concat', name='w_repr')
        zcnn = TimeDistributed(Conv1D(opts.nbfilters,
                                      opts.filter1_len,
                                      padding='valid'),
                               name='zcnn')(w_repr)
    else:
        zcnn = TimeDistributed(Conv1D(opts.nbfilters,
                                      opts.filter1_len,
                                      padding='valid'),
                               name='zcnn')(resh_W)

    # pooling mode
    if opts.mode == 'mot':
        logger.info("Use mean-over-time pooling on sentence")
        avg_zcnn = TimeDistributed(GlobalAveragePooling1D(),
                                   name='avg_zcnn')(zcnn)
    elif opts.mode == 'att':
        logger.info('Use attention-pooling on sentence')
        avg_zcnn = TimeDistributed(Attention(), name='avg_zcnn')(zcnn)
    elif opts.mode == 'merged':
        logger.info(
            'Use mean-over-time and attention-pooling together on sentence')
        avg_zcnn1 = TimeDistributed(GlobalAveragePooling1D(),
                                    name='avg_zcnn1')(zcnn)
        avg_zcnn2 = TimeDistributed(Attention(), name='avg_zcnn2')(zcnn)
        avg_zcnn = merge([avg_zcnn1, avg_zcnn2],
                         mode='concat',
                         name='avg_zcnn')
    else:
        raise NotImplementedError
    hz_lstm = LSTM(opts.lstm_units, return_sequences=True,
                   name='hz_lstm')(avg_zcnn)

    if opts.mode == 'mot':
        logger.info('Use mean-over-time pooling on text')
        avg_hz_lstm = GlobalAveragePooling1D(name='avg_hz_lstm')(hz_lstm)
    elif opts.mode == 'att':
        logger.info('Use attention-pooling on text')
        avg_hz_lstm = Attention(name='avg_hz_lstm')(hz_lstm)
    elif opts.mode == 'merged':
        logger.info(
            'Use mean-over-time and attention-pooling together on text')
        avg_hz_lstm1 = GlobalAveragePooling1D(name='avg_hz_lstm1')(hz_lstm)
        avg_hz_lstm2 = Attention(name='avg_hz_lstm2')(hz_lstm)
        avg_hz_lstm = merge([avg_hz_lstm1, avg_hz_lstm2],
                            mode='concat',
                            name='avg_hz_lstm')
    else:
        raise NotImplementedError
    if opts.l2_value:
        logger.info("Use l2 regularizers, l2 value = %s" % opts.l2_value)
        y = Dense(units=1,
                  activation='sigmoid',
                  name='output',
                  W_regularizer=l2(opts.l2_value))(avg_hz_lstm)
    else:
        y = Dense(units=1, activation='sigmoid', name='output')(avg_hz_lstm)

    if opts.use_char:
        model = Model(inputs=[word_input, char_input], outputs=y)
    else:
        model = Model(inputs=word_input, outputs=y)

    if opts.init_bias and init_mean_value:
        logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
        bias_value = (np.log(init_mean_value) -
                      np.log(1 - init_mean_value)).astype(K.floatx())
        model.layers[-1].b.set_value(bias_value)

    if verbose:
        model.summary()

    start_time = time.time()
    model.compile(loss='mse', optimizer='rmsprop')
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f s" % total_time)

    return model

Пример #5

Показать файл

Файл: hier_networks.py Проект: Chan0717/Muitimodel

def build_model_fusion(opts, vocab_size=0, maxnum=50, maxlen=50, embedd_dim=50, embedding_weights=None, verbose=False, init_mean_value=None):

    # p_input1 = Input(shape=(256, 256, 3), dtype='float32', name='p_input1')
    # p_input2 = Input(shape=(256, 256, 3), dtype='float32', name='p_input2')
    # p_input3 = Input(shape=(256, 256, 3), dtype='float32', name='p_input3')
    # p_input4 = Input(shape=(256, 256, 3), dtype='float32', name='p_input4')
    p = Input(shape=(256, 256, 3), dtype='float32', name='p')
    cnn_model = cnn()
    img = cnn_model(p)
    img = Reshape([6*6, 100])(img)
    # img1 = cnn_model(p_input1)
    # img2 = cnn_model(p_input2)
    # img3 = cnn_model(p_input3)
    # img4 = cnn_model(p_input4)
    # img1 = GlobalMaxPooling2D()(img1)
    # img2 = GlobalMaxPooling2D()(img2)
    # img3 = GlobalMaxPooling2D()(img3)
    # img4 = GlobalMaxPooling2D()(img4)

    # img = concatenate([img1, img2, img3, img4], axis=1)
    # img = Reshape((4, 100))(img)

    N = maxnum
    L = maxlen

    word_input = Input(shape=(N * L,), dtype='int32', name='word_input')
    x = Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=N * L, weights=embedding_weights,
                  mask_zero=True, name='x')(word_input)
    x_maskedout = ZeroMaskedEntries(name='x_maskedout')(x)
    drop_x = Dropout(opts.dropout, name='drop_x')(x_maskedout)

    resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)

    cnn_e = TimeDistributed(Conv1D(opts.nbfilters, opts.filter1_len, border_mode='valid', activation='tanh'), name='cnn_e')(resh_W)
    cnn_e = Dropout(rate=0.5)(cnn_e)
    att_cnn_e = TimeDistributed(Attention(), name='att_cnn_e')(cnn_e)
    att_cnn_e = Dropout(rate=0.5)(att_cnn_e)
    lstm_e = LSTM(opts.lstm_units, return_sequences=True, name='lstm_e')(att_cnn_e)
    lstm_e = Dropout(rate=0.5)(lstm_e)
    G = CoAttention(name='essay')([lstm_e, img])
    avg = GlobalAveragePooling1D()(G)
    final_vec_drop = Dropout(rate=0.5, name='final_vec_drop')(avg)

    if opts.l2_value:
        logger.info("Use l2 regularizers, l2 value = %s" % opts.l2_value)
        y = Dense(units=1, activation='sigmoid', name='output', W_regularizer=l2(opts.l2_value))(final_vec_drop)
    else:
        y = Dense(units=1, activation='sigmoid', name='output')(final_vec_drop)

    # model = Model(input=[word_input, p_input1, p_input2, p_input3, p_input4], output=y)
    model = Model(input=[word_input, p], output=y)
    if opts.init_bias and init_mean_value:
        logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
        bias_value = (np.log(init_mean_value) - np.log(1 - init_mean_value)).astype(K.floatx())
        model.layers[-1].b.set_value(bias_value)

    if verbose:
        model.summary()

    start_time = time.time()
    model.compile(loss='mse', optimizer='adam')
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f s" % total_time)

    return model

Пример #6

Показать файл

X_train = E.reshape(E.shape[0],68,178*50,1).astype('float32')

print(np.shape(X_train))

labeled_data = zip(E, resolved_scores)


from keras.models import Sequential
from keras.layers import Bidirectional, Conv1D,Input,Flatten,MaxPooling2D,TimeDistributed,LSTM,Dense, Conv2D, Flatten, GlobalAveragePooling1D, GlobalAveragePooling2D
from keras.models import Model

cnn_input= Input(shape=(68,178*50,1))   #Frames,height,width,channel of imafe
conv1 = TimeDistributed(Conv1D(64, 3,    activation='relu'))(cnn_input)
#conv2 = TimeDistributed(Conv2D(64, (3,3), activation='relu'))(conv1)
pool1=TimeDistributed(MaxPooling1D(pool_size=4))(conv1)
att=TimeDistributed(Attention())(pool1)
flat=TimeDistributed(Flatten())(att)
#cnn_op= TimeDistributed(Dense(output_dim=3))(flat)

lstm = Bidirectional(LSTM(100, return_sequences=True, activation='tanh'))(flat)
bb = Flatten()(lstm)
op =Dense(1, activation='sigmoid')(bb)
fun_model = Model(inputs=[cnn_input], outputs=op)

fun_model.compile(loss='mse', optimizer='rmsprop')

y_train = resolved_scores
print(y_train)
print(np.shape(y_train))
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

Пример #7

Показать файл

X_train = E.reshape(E.shape[0], 68, 178, 50, 1).astype('float32')

print(np.shape(X_train))

labeled_data = zip(E, resolved_scores)

from keras.models import Sequential
from keras.layers import Bidirectional, Conv1D, Input, Flatten, MaxPooling2D, TimeDistributed, LSTM, Dense, Conv2D, Flatten, GlobalAveragePooling1D, GlobalAveragePooling2D
from keras.models import Model
from softattention import Attention

cnn_input = Input(shape=(68, 178, 50,
                         1))  #Frames,height,width,channel of imafe
conv1 = TimeDistributed(Conv2D(100, (3, 3), activation='relu'))(cnn_input)
#conv2 = TimeDistributed(Conv2D(64, (3,3), activation='relu'))(conv1)
pool1 = TimeDistributed(TimeDistributed(Attention()))(conv1)
flat = TimeDistributed(Flatten())(pool1)
#cnn_op= TimeDistributed(Dense(output_dim=3))(flat)

lstm = Bidirectional(LSTM(128, return_sequences=True, activation='tanh'))(flat)
bb = Flatten()(lstm)
op = Dense(1, activation='sigmoid')(bb)
fun_model = Model(inputs=[cnn_input], outputs=op)

from keras.utils.np_utils import to_categorical

#model = Sequential()
#model.add(Dropout(0.5,input_shape=(178,50,1)))
#model.add(TimeDistributed(Conv2D(64, kernel_size=13, activation='relu')))
#model.add(TimeDistributed(GlobalAveragePooling1D()))
#model.add(LSTM())

Пример #8

Показать файл

def build_shrcnn_model(opts,
                       vocab_size=0,
                       char_vocabsize=0,
                       maxnum=50,
                       maxlen=50,
                       maxcnum=50,
                       maxclen=50,
                       maxcharlen=20,
                       embedd_dim=50,
                       embedding_weights=None,
                       verbose=False,
                       init_mean_value=None):
    # LSTM stacked over CNN based on sentence level
    N = maxnum
    L = maxlen

    cN = maxcnum
    cL = maxclen

    logger.info(
        "Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, nbfilters = %s, filter1_len = %s, drop rate = %s"
        % (N, L, embedd_dim, opts.nbfilters, opts.filter1_len, opts.dropout))

    word_input = Input(shape=(N * L, ), dtype='int32', name='word_input')
    context_input = Input(shape=(cN * cL, ),
                          dtype='int32',
                          name='context_input')

    emb = Embedding(output_dim=embedd_dim,
                    input_dim=vocab_size,
                    weights=embedding_weights,
                    mask_zero=True,
                    name='cx')
    cx = emb(context_input)
    cx_maskedout = ZeroMaskedEntries(name='cx_maskedout')(cx)
    drop_cx = Dropout(opts.dropout, name='drop_cx')(cx_maskedout)

    resh_C = Reshape((cN, cL, embedd_dim), name='resh_C')(drop_cx)

    czcnn = TimeDistributed(Conv1D(opts.nbfilters,
                                   opts.filter1_len,
                                   padding='valid'),
                            name='czcnn')(resh_C)

    x = emb(word_input)
    x_maskedout = ZeroMaskedEntries(name='x_maskedout')(x)
    drop_x = Dropout(opts.dropout, name='drop_x')(x_maskedout)

    resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)

    # add char-based CNN, concatenating with word embedding to compose word representation
    zcnn = TimeDistributed(Conv1D(opts.nbfilters,
                                  opts.filter1_len,
                                  padding='valid'),
                           name='zcnn')(resh_W)
    '''
    encoded_essay = Reshape((zcnn.shape[1].value*zcnn.shape[2].value, opts.nbfilters))(zcnn)
    encoded_context = Reshape((czcnn.shape[1].value*czcnn.shape[2].value, opts.nbfilters))(czcnn)
    # bidaf
    # Now we compute a similarity between the passage words and the question words, and
    # normalize the matrix in a couple of different ways for input into some more layers.
    matrix_attention_layer = MatrixAttention(name='essay_context_similarity')
    # matrix_attention_layer = LinearMatrixAttention(name='passage_question_similarity')

    # Shape: (batch_size, num_passage_words, num_question_words)
    essay_context_similarity = matrix_attention_layer([encoded_essay, encoded_context])


    # Shape: (batch_size, num_passage_words, num_question_words), normalized over question
    # words for each passage word.
    essay_context_attention = MaskedSoftmax()(essay_context_similarity)
    # Shape: (batch_size, num_passage_words, embedding_dim * 2)
    weighted_sum_layer = WeightedSum(name="essay_context_vectors", use_masking=False)
    essay_context_vectors = weighted_sum_layer([encoded_context, essay_context_attention])

    
    # Min's paper finds, for each document word, the most similar question word to it, and
    # computes a single attention over the whole document using these max similarities.
    # Shape: (batch_size, num_passage_words)
    context_essay_similarity = Max(axis=-1)(essay_context_similarity)
    # Shape: (batch_size, num_passage_words)
    context_essay_attention = MaskedSoftmax()(context_essay_similarity)
    # Shape: (batch_size, embedding_dim * 2)
    weighted_sum_layer = WeightedSum(name="question_passage_vector", use_masking=False)
    context_essay_vector = weighted_sum_layer([encoded_essay, context_essay_attention])

    # Then he repeats this question/passage vector for every word in the passage, and uses it
    # as an additional input to the hidden layers above.
    repeat_layer = RepeatLike(axis=1, copy_from_axis=1)
    # Shape: (batch_size, num_passage_words, embedding_dim * 2)
    tiled_context_essay_vector = repeat_layer([context_essay_vector, encoded_essay])

    complex_concat_layer = ComplexConcat(combination='1*2,1*3', name='final_merged_passage')
    final_merged_passage = complex_concat_layer([encoded_essay,
                                                 essay_context_vectors,
                                                 tiled_context_essay_vector])
    

    complex_concat_layer = ComplexConcat(combination='1*2', name='final_merged_passage')
    final_merged_passage = complex_concat_layer([encoded_essay,
                                                 essay_context_vectors])


    mcnn = Reshape((zcnn.shape[1].value, zcnn.shape[2].value, opts.nbfilters), name='mcnn')(final_merged_passage)
    '''

    # pooling mode
    if opts.mode == 'mot':
        logger.info("Use mean-over-time pooling on sentence")
        avg_zcnn = TimeDistributed(GlobalAveragePooling1D(),
                                   name='avg_zcnn')(zcnn)
    elif opts.mode == 'att':
        logger.info('Use attention-pooling on sentence')
        avg_zcnn = TimeDistributed(Attention(), name='avg_zcnn')(zcnn)
        avg_czcnn = TimeDistributed(Attention(), name='avg_czcnn')(czcnn)
    elif opts.mode == 'merged':
        logger.info(
            'Use mean-over-time and attention-pooling together on sentence')
        avg_zcnn1 = TimeDistributed(GlobalAveragePooling1D(),
                                    name='avg_zcnn1')(zcnn)
        avg_zcnn2 = TimeDistributed(Attention(), name='avg_zcnn2')(zcnn)
        avg_zcnn = merge([avg_zcnn1, avg_zcnn2],
                         mode='concat',
                         name='avg_zcnn')
    else:
        raise NotImplementedError

    hz_lstm = LSTM(opts.lstm_units, return_sequences=True,
                   name='hz_lstm')(avg_zcnn)
    chz_lstm = LSTM(opts.lstm_units, return_sequences=True,
                    name='chz_lstm')(avg_czcnn)

    if opts.mode == 'mot':
        logger.info('Use mean-over-time pooling on text')
        avg_hz_lstm = GlobalAveragePooling1D(name='avg_hz_lstm')(hz_lstm)
    elif opts.mode == 'att':
        logger.info('Use co-attention on text')

        # PART 2:
        # Now we compute a similarity between the passage words and the question words, and
        # normalize the matrix in a couple of different ways for input into some more layers.
        matrix_attention_layer = MatrixAttention(
            name='essay_context_similarity')
        # Shape: (batch_size, num_passage_words, num_question_words)
        essay_context_similarity = matrix_attention_layer([hz_lstm, chz_lstm])

        # Shape: (batch_size, num_passage_words, num_question_words), normalized over question
        # words for each passage word.
        essay_context_attention = MaskedSoftmax()(essay_context_similarity)
        weighted_sum_layer = WeightedSum(name="essay_context_vectors",
                                         use_masking=False)
        # Shape: (batch_size, num_passage_words, embedding_dim * 2)
        weighted_hz_lstm = weighted_sum_layer(
            [chz_lstm, essay_context_attention])

        # Min's paper finds, for each document word, the most similar question word to it, and
        # computes a single attention over the whole document using these max similarities.
        # Shape: (batch_size, num_passage_words)
        context_essay_similarity = Max(axis=-1)(essay_context_similarity)
        # Shape: (batch_size, num_passage_words)
        context_essay_attention = MaskedSoftmax()(context_essay_similarity)
        # Shape: (batch_size, embedding_dim * 2)
        weighted_sum_layer = WeightedSum(name="context_essay_vector",
                                         use_masking=False)
        context_essay_vector = weighted_sum_layer(
            [hz_lstm, context_essay_attention])

        # Then he repeats this question/passage vector for every word in the passage, and uses it
        # as an additional input to the hidden layers above.
        repeat_layer = RepeatLike(axis=1, copy_from_axis=1)
        # Shape: (batch_size, num_passage_words, embedding_dim * 2)
        tiled_context_essay_vector = repeat_layer(
            [context_essay_vector, hz_lstm])

        complex_concat_layer = ComplexConcat(combination='1,2,1*2,1*3',
                                             name='final_merged_passage')
        final_merged_passage = complex_concat_layer(
            [hz_lstm, weighted_hz_lstm, tiled_context_essay_vector])

        avg_hz_lstm = LSTM(opts.lstm_units,
                           return_sequences=False,
                           name='avg_hz_lstm')(final_merged_passage)

        # avg_hz_lstm = CoAttentionWithoutBi(name='avg_hz_lstm')([hz_lstm, weighted_hz_lstm])

        # avg_hz_lstm = Attention(name='avg_hz_lstm')(hz_lstm)
    elif opts.mode == 'merged':
        logger.info(
            'Use mean-over-time and attention-pooling together on text')
        avg_hz_lstm1 = GlobalAveragePooling1D(name='avg_hz_lstm1')(hz_lstm)
        avg_hz_lstm2 = Attention(name='avg_hz_lstm2')(hz_lstm)
        avg_hz_lstm = merge([avg_hz_lstm1, avg_hz_lstm2],
                            mode='concat',
                            name='avg_hz_lstm')
    else:
        raise NotImplementedError
    if opts.l2_value:
        logger.info("Use l2 regularizers, l2 value = %s" % opts.l2_value)
        y = Dense(units=1,
                  activation='sigmoid',
                  name='output',
                  W_regularizer=l2(opts.l2_value))(avg_hz_lstm)
    else:
        y = Dense(units=1, activation='sigmoid', name='output')(avg_hz_lstm)

    model = Model(inputs=[word_input, context_input], outputs=y)

    if opts.init_bias and init_mean_value:
        logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
        bias_value = (np.log(init_mean_value) -
                      np.log(1 - init_mean_value)).astype(K.floatx())
        model.layers[-1].b.set_value(bias_value)

    if verbose:
        model.summary()

    start_time = time.time()
    model.compile(loss='mse', optimizer='rmsprop')
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f s" % total_time)

    return model

Пример #9

Показать файл

def build_hrcnn_model(opts,
                      vocab_size=0,
                      maxnum=50,
                      maxlen=50,
                      embedd_dim=50,
                      embedding_weights=None,
                      verbose=False,
                      init_mean_value=None):
    # LSTM stacked over CNN based on sentence level
    N = maxnum
    L = maxlen
    print(opts)
    logger.info(
        "Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, nbfilters = %s, filter1_len = %s, drop rate = %s"
        % (N, L, embedd_dim, opts.nbfilters, opts.filter1_len, opts.dropout))

    word_input = Input(shape=(N * L, ), dtype='int32', name='word_input')

    # embedding layer
    if opts.use_mask == 0:
        x = Embedding(output_dim=embedd_dim,
                      input_dim=vocab_size,
                      input_length=N * L,
                      weights=embedding_weights,
                      mask_zero=False,
                      name='x')(word_input)
        x_maskedout = x

    elif opts.use_mask == 1:
        x = Embedding(output_dim=embedd_dim,
                      input_dim=vocab_size,
                      input_length=N * L,
                      weights=embedding_weights,
                      mask_zero=True,
                      name='x')(word_input)
        x_maskedout = ZeroMaskedEntries(name='x_maskedout')(x)

    # drop out
    drop_x = Dropout(opts.dropout, name='drop_x')(x_maskedout)
    # reshape
    resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)
    # CNN layer
    zcnn = TimeDistributed(Convolution1D(opts.nbfilters,
                                         opts.filter1_len,
                                         border_mode='valid'),
                           name='zcnn')(resh_W)

    # pooling mode1 on CNN
    if opts.mode1 == 'mot':
        logger.info("Use mean-over-time pooling on sentence")
        avg_zcnn = TimeDistributed(GlobalAveragePooling1D(),
                                   name='avg_zcnn')(zcnn)
    elif opts.mode1 == 'att':
        logger.info('Use attention-pooling on sentence')
        avg_zcnn = TimeDistributed(Attention(), name='avg_zcnn')(zcnn)
    elif opts.mode1 == 'merged':
        logger.info(
            'Use mean-over-time and attention-pooling together on sentence')
        avg_zcnn1 = TimeDistributed(GlobalAveragePooling1D(),
                                    input_shape=(K.int_shape(zcnn)[2],
                                                 K.int_shape(zcnn)[3]),
                                    name='avg_zcnn1')(zcnn)
        avg_zcnn2 = TimeDistributed(Attention(), name='avg_zcnn2')(zcnn)
        avg_zcnn = merge([avg_zcnn1, avg_zcnn2],
                         mode='concat',
                         name='avg_zcnn')
    else:
        raise NotImplementedError

    hz_lstm = LSTM(opts.lstm_units, return_sequences=True,
                   name='hz_lstm')(avg_zcnn)

    # pooling mode1 on LSTM
    if opts.mode2 == 'mot':
        logger.info('Use mean-over-time pooling on text')
        avg_hz_lstm = GlobalAveragePooling1D(name='avg_hz_lstm')(hz_lstm)
    elif opts.mode2 == 'att':
        logger.info('Use attention-pooling on text')
        avg_hz_lstm = Attention(name='avg_hz_lstm')(hz_lstm)
    elif opts.mode2 == 'merged':
        logger.info(
            'Use mean-over-time and attention-pooling together on text')
        avg_hz_lstm1 = GlobalAveragePooling1D(name='avg_hz_lstm1')(hz_lstm)
        avg_hz_lstm2 = Attention(name='avg_hz_lstm2')(hz_lstm)
        avg_hz_lstm = merge([avg_hz_lstm1, avg_hz_lstm2],
                            mode='concat',
                            name='avg_hz_lstm')
    else:
        raise NotImplementedError

    # l2 regularization
    if opts.l2_value:
        logger.info("Use l2 regularizers, l2 value = %s" % opts.l2_value)
        y = Dense(output_dim=1,
                  activation='sigmoid',
                  name='output',
                  kernel_regularizer=regularizers.l2(
                      opts.l2_value))(avg_hz_lstm)
    else:
        y = Dense(output_dim=1, activation='sigmoid',
                  name='output')(avg_hz_lstm)

    model = Model(input=word_input, output=y)

    if opts.init_bias and init_mean_value:
        logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
        bias_value = (np.log(init_mean_value) -
                      np.log(1 - init_mean_value)).astype(K.floatx())
        model.layers[-1].bias = bias_value

    if verbose:
        model.summary()

    start_time = time.time()
    model.compile(loss='mse', optimizer='rmsprop')
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f s" % total_time)

    return model