Exemplo n.º 1
0
def build_attention2_model(opts, vocab_size=0, maxnum=50, maxlen=50, embedd_dim=50, embedding_weights=None, verbose=False, init_mean_value=None):
    N = maxnum
    L = maxlen

    logger = get_logger('Build attention pooling model')
    logger.info("Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, lstm_units = %s, drop rate = %s, l2 = %s" % (N, L, embedd_dim,
        opts.lstm_units, opts.dropout, opts.l2_value))

    model = Sequential()
    model.add(Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=N*L, weights=embedding_weights,name='x'))
    model.add(Dropout(opts.dropout, name='drop_x'))
    model.add(Reshape((N, L, embedd_dim),dtype='int32', input_shape=(N*L,), name='resh_W'))

    model.add(TimeDistributed(LSTM(opts.lstm_units, return_sequences=True), name='z'))
    model.add(TimeDistributed(Attention(name='att_z')))
    model.add(LSTM(opts.lstm_units, return_sequences=True, name='hz'))
    model.add(Attention(name='attent_hz'))
    model.add(Dense(output_dim=1, activation='sigmoid', name='output')) 
    if opts.init_bias and init_mean_value:
        logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
        bias_value = (np.log(init_mean_value) - np.log(1 - init_mean_value)).astype(K.floatx())
        model.layers[-1].bias = bias_value
    if verbose:
        model.summary()

    start_time = time.time()
    model.compile(loss='mse', optimizer='rmsprop')
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f s" % total_time)

    return model
Exemplo n.º 2
0
def build_attention2_model(opts,
                           vocab_size=0,
                           maxnum=50,
                           maxlen=50,
                           embedd_dim=50,
                           embedding_weights=None,
                           verbose=False,
                           init_mean_value=None):
    N = maxnum
    L = maxlen

    logger = get_logger('Build attention pooling model')
    logger.info(
        "Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, lstm_units = %s, drop rate = %s, l2 = %s"
        % (N, L, embedd_dim, opts.lstm_units, opts.dropout, opts.l2_value))
    word_input = Input(shape=(N * L, ), dtype='int32', name='word_input')
    x = Embedding(output_dim=embedd_dim,
                  input_dim=vocab_size,
                  input_length=N * L,
                  weights=embedding_weights,
                  name='x')(word_input)
    drop_x = Dropout(opts.dropout, name='drop_x')(x)

    resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)

    z = TimeDistributed(LSTM(opts.lstm_units, return_sequences=True),
                        name='z')(resh_W)
    att_z = TimeDistributed(Attention(name='att_z'))(z)

    hz = LSTM(opts.lstm_units, return_sequences=True, name='hz')(att_z)
    # avg_h = MeanOverTime(mask_zero=True, name='avg_h')(hz)
    # avg_hz = GlobalAveragePooling1D(name='avg_hz')(hz)
    attent_hz = Attention(name='attent_hz')(hz)
    y = Dense(output_dim=1, activation='sigmoid', name='output')(attent_hz)

    model = Model(input=word_input, output=y)
    if opts.init_bias and init_mean_value:
        logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
        bias_value = (np.log(init_mean_value) -
                      np.log(1 - init_mean_value)).astype(K.floatx())
        model.layers[-1].b.set_value(bias_value)
    if verbose:
        model.summary()

    start_time = time.time()
    model.compile(loss='mse', optimizer='rmsprop')
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f s" % total_time)

    return model
Exemplo n.º 3
0
def build_model(opts, vocab_size=0, maxnum=50, maxlen=50, embedd_dim=50, embedding_weights=None, verbose=False, init_mean_value=None):
    N = maxnum
    L = maxlen

    p = Input(shape=(4, 2048), dtype='float32', name='p')
    # img_vector = Dense(name='img_vector', units=128)(p)

    word_input = Input(shape=(N * L,), dtype='int32', name='word_input')
    x = Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=N * L, weights=embedding_weights,
                  mask_zero=True, trainable=False, name='x')(word_input)
    x_maskedout = ZeroMaskedEntries(name='x_maskedout')(x)
    drop_x = Dropout(opts.dropout, name='drop_x')(x_maskedout)

    resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)

    cnn_e = TimeDistributed(Conv1D(opts.nbfilters, opts.filter1_len, border_mode='valid'), name='cnn_e')(resh_W)

    att_cnn_e = TimeDistributed(Attention(), name='att_cnn_e')(cnn_e)

    lstm_e = LSTM(opts.lstm_units, return_sequences=True, name='lstm_e')(att_cnn_e)

    G = CoAttention(name='essay')([lstm_e, p])
    avg = GlobalAveragePooling1D()(G)
    final_vec_drop = Dropout(rate=0.5, name='final_vec_drop')(avg)
    if opts.l2_value:
        logger.info("Use l2 regularizers, l2 value = %s" % opts.l2_value)
        y = Dense(units=1, activation='sigmoid', name='output', W_regularizer=l2(opts.l2_value))(final_vec_drop)
    else:
        y = Dense(units=1, activation='sigmoid', name='output')(final_vec_drop)

    model = Model(input=[word_input, p], output=y)

    if opts.init_bias and init_mean_value:
        logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
        bias_value = (np.log(init_mean_value) - np.log(1 - init_mean_value)).astype(K.floatx())
        model.layers[-1].b.set_value(bias_value
                                     )

    if verbose:
        model.summary()

    start_time = time.time()
    model.compile(loss='mse', optimizer='adam')
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f s" % total_time)

    return model
Exemplo n.º 4
0
def build_hrcnn_model(opts,
                      vocab_size=0,
                      char_vocabsize=0,
                      maxnum=50,
                      maxlen=50,
                      maxcharlen=20,
                      embedd_dim=50,
                      embedding_weights=None,
                      verbose=False,
                      init_mean_value=None):
    # LSTM stacked over CNN based on sentence level
    N = maxnum
    L = maxlen

    logger.info(
        "Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, nbfilters = %s, filter1_len = %s, drop rate = %s"
        % (N, L, embedd_dim, opts.nbfilters, opts.filter1_len, opts.dropout))

    word_input = Input(shape=(N * L, ), dtype='int32', name='word_input')
    x = Embedding(output_dim=embedd_dim,
                  input_dim=vocab_size,
                  input_length=N * L,
                  weights=embedding_weights,
                  mask_zero=True,
                  name='x')(word_input)
    x_maskedout = ZeroMaskedEntries(name='x_maskedout')(x)
    drop_x = Dropout(opts.dropout, name='drop_x')(x_maskedout)

    resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)

    # add char-based CNN, concatenating with word embedding to compose word representation
    if opts.use_char:
        char_input = Input(shape=(N * L * maxcharlen, ),
                           dtype='int32',
                           name='char_input')
        xc = Embedding(output_dim=opts.char_embedd_dim,
                       input_dim=char_vocabsize,
                       input_length=N * L * maxcharlen,
                       mask_zero=True,
                       name='xc')(char_input)
        xc_masked = ZeroMaskedEntries(name='xc_masked')(xc)
        drop_xc = Dropout(opts.dropout, name='drop_xc')(xc_masked)
        res_xc = Reshape((N * L, maxcharlen, opts.char_embedd_dim),
                         name='res_xc')(drop_xc)
        cnn_xc = TimeDistributed(Conv1D(opts.char_nbfilters,
                                        opts.filter2_len,
                                        padding='valid'),
                                 name='cnn_xc')(res_xc)
        max_xc = TimeDistributed(GlobalMaxPooling1D(), name='avg_xc')(cnn_xc)
        res_xc2 = Reshape((N, L, opts.char_nbfilters), name='res_xc2')(max_xc)

        w_repr = merge([resh_W, res_xc2], mode='concat', name='w_repr')
        zcnn = TimeDistributed(Conv1D(opts.nbfilters,
                                      opts.filter1_len,
                                      padding='valid'),
                               name='zcnn')(w_repr)
    else:
        zcnn = TimeDistributed(Conv1D(opts.nbfilters,
                                      opts.filter1_len,
                                      padding='valid'),
                               name='zcnn')(resh_W)

    # pooling mode
    if opts.mode == 'mot':
        logger.info("Use mean-over-time pooling on sentence")
        avg_zcnn = TimeDistributed(GlobalAveragePooling1D(),
                                   name='avg_zcnn')(zcnn)
    elif opts.mode == 'att':
        logger.info('Use attention-pooling on sentence')
        avg_zcnn = TimeDistributed(Attention(), name='avg_zcnn')(zcnn)
    elif opts.mode == 'merged':
        logger.info(
            'Use mean-over-time and attention-pooling together on sentence')
        avg_zcnn1 = TimeDistributed(GlobalAveragePooling1D(),
                                    name='avg_zcnn1')(zcnn)
        avg_zcnn2 = TimeDistributed(Attention(), name='avg_zcnn2')(zcnn)
        avg_zcnn = merge([avg_zcnn1, avg_zcnn2],
                         mode='concat',
                         name='avg_zcnn')
    else:
        raise NotImplementedError
    hz_lstm = LSTM(opts.lstm_units, return_sequences=True,
                   name='hz_lstm')(avg_zcnn)

    if opts.mode == 'mot':
        logger.info('Use mean-over-time pooling on text')
        avg_hz_lstm = GlobalAveragePooling1D(name='avg_hz_lstm')(hz_lstm)
    elif opts.mode == 'att':
        logger.info('Use attention-pooling on text')
        avg_hz_lstm = Attention(name='avg_hz_lstm')(hz_lstm)
    elif opts.mode == 'merged':
        logger.info(
            'Use mean-over-time and attention-pooling together on text')
        avg_hz_lstm1 = GlobalAveragePooling1D(name='avg_hz_lstm1')(hz_lstm)
        avg_hz_lstm2 = Attention(name='avg_hz_lstm2')(hz_lstm)
        avg_hz_lstm = merge([avg_hz_lstm1, avg_hz_lstm2],
                            mode='concat',
                            name='avg_hz_lstm')
    else:
        raise NotImplementedError
    if opts.l2_value:
        logger.info("Use l2 regularizers, l2 value = %s" % opts.l2_value)
        y = Dense(units=1,
                  activation='sigmoid',
                  name='output',
                  W_regularizer=l2(opts.l2_value))(avg_hz_lstm)
    else:
        y = Dense(units=1, activation='sigmoid', name='output')(avg_hz_lstm)

    if opts.use_char:
        model = Model(inputs=[word_input, char_input], outputs=y)
    else:
        model = Model(inputs=word_input, outputs=y)

    if opts.init_bias and init_mean_value:
        logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
        bias_value = (np.log(init_mean_value) -
                      np.log(1 - init_mean_value)).astype(K.floatx())
        model.layers[-1].b.set_value(bias_value)

    if verbose:
        model.summary()

    start_time = time.time()
    model.compile(loss='mse', optimizer='rmsprop')
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f s" % total_time)

    return model
Exemplo n.º 5
0
def build_model_fusion(opts, vocab_size=0, maxnum=50, maxlen=50, embedd_dim=50, embedding_weights=None, verbose=False, init_mean_value=None):

    # p_input1 = Input(shape=(256, 256, 3), dtype='float32', name='p_input1')
    # p_input2 = Input(shape=(256, 256, 3), dtype='float32', name='p_input2')
    # p_input3 = Input(shape=(256, 256, 3), dtype='float32', name='p_input3')
    # p_input4 = Input(shape=(256, 256, 3), dtype='float32', name='p_input4')
    p = Input(shape=(256, 256, 3), dtype='float32', name='p')
    cnn_model = cnn()
    img = cnn_model(p)
    img = Reshape([6*6, 100])(img)
    # img1 = cnn_model(p_input1)
    # img2 = cnn_model(p_input2)
    # img3 = cnn_model(p_input3)
    # img4 = cnn_model(p_input4)
    # img1 = GlobalMaxPooling2D()(img1)
    # img2 = GlobalMaxPooling2D()(img2)
    # img3 = GlobalMaxPooling2D()(img3)
    # img4 = GlobalMaxPooling2D()(img4)

    # img = concatenate([img1, img2, img3, img4], axis=1)
    # img = Reshape((4, 100))(img)

    N = maxnum
    L = maxlen

    word_input = Input(shape=(N * L,), dtype='int32', name='word_input')
    x = Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=N * L, weights=embedding_weights,
                  mask_zero=True, name='x')(word_input)
    x_maskedout = ZeroMaskedEntries(name='x_maskedout')(x)
    drop_x = Dropout(opts.dropout, name='drop_x')(x_maskedout)

    resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)

    cnn_e = TimeDistributed(Conv1D(opts.nbfilters, opts.filter1_len, border_mode='valid', activation='tanh'), name='cnn_e')(resh_W)
    cnn_e = Dropout(rate=0.5)(cnn_e)
    att_cnn_e = TimeDistributed(Attention(), name='att_cnn_e')(cnn_e)
    att_cnn_e = Dropout(rate=0.5)(att_cnn_e)
    lstm_e = LSTM(opts.lstm_units, return_sequences=True, name='lstm_e')(att_cnn_e)
    lstm_e = Dropout(rate=0.5)(lstm_e)
    G = CoAttention(name='essay')([lstm_e, img])
    avg = GlobalAveragePooling1D()(G)
    final_vec_drop = Dropout(rate=0.5, name='final_vec_drop')(avg)

    if opts.l2_value:
        logger.info("Use l2 regularizers, l2 value = %s" % opts.l2_value)
        y = Dense(units=1, activation='sigmoid', name='output', W_regularizer=l2(opts.l2_value))(final_vec_drop)
    else:
        y = Dense(units=1, activation='sigmoid', name='output')(final_vec_drop)

    # model = Model(input=[word_input, p_input1, p_input2, p_input3, p_input4], output=y)
    model = Model(input=[word_input, p], output=y)
    if opts.init_bias and init_mean_value:
        logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
        bias_value = (np.log(init_mean_value) - np.log(1 - init_mean_value)).astype(K.floatx())
        model.layers[-1].b.set_value(bias_value)

    if verbose:
        model.summary()

    start_time = time.time()
    model.compile(loss='mse', optimizer='adam')
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f s" % total_time)

    return model
Exemplo n.º 6
0
X_train = E.reshape(E.shape[0],68,178*50,1).astype('float32')

print(np.shape(X_train))

labeled_data = zip(E, resolved_scores)


from keras.models import Sequential
from keras.layers import Bidirectional, Conv1D,Input,Flatten,MaxPooling2D,TimeDistributed,LSTM,Dense, Conv2D, Flatten, GlobalAveragePooling1D, GlobalAveragePooling2D
from keras.models import Model

cnn_input= Input(shape=(68,178*50,1))   #Frames,height,width,channel of imafe
conv1 = TimeDistributed(Conv1D(64, 3,    activation='relu'))(cnn_input)
#conv2 = TimeDistributed(Conv2D(64, (3,3), activation='relu'))(conv1)
pool1=TimeDistributed(MaxPooling1D(pool_size=4))(conv1)
att=TimeDistributed(Attention())(pool1)
flat=TimeDistributed(Flatten())(att)
#cnn_op= TimeDistributed(Dense(output_dim=3))(flat)

lstm = Bidirectional(LSTM(100, return_sequences=True, activation='tanh'))(flat)
bb = Flatten()(lstm)
op =Dense(1, activation='sigmoid')(bb)
fun_model = Model(inputs=[cnn_input], outputs=op)

fun_model.compile(loss='mse', optimizer='rmsprop')

y_train = resolved_scores
print(y_train)
print(np.shape(y_train))
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
Exemplo n.º 7
0
X_train = E.reshape(E.shape[0], 68, 178, 50, 1).astype('float32')

print(np.shape(X_train))

labeled_data = zip(E, resolved_scores)

from keras.models import Sequential
from keras.layers import Bidirectional, Conv1D, Input, Flatten, MaxPooling2D, TimeDistributed, LSTM, Dense, Conv2D, Flatten, GlobalAveragePooling1D, GlobalAveragePooling2D
from keras.models import Model
from softattention import Attention

cnn_input = Input(shape=(68, 178, 50,
                         1))  #Frames,height,width,channel of imafe
conv1 = TimeDistributed(Conv2D(100, (3, 3), activation='relu'))(cnn_input)
#conv2 = TimeDistributed(Conv2D(64, (3,3), activation='relu'))(conv1)
pool1 = TimeDistributed(TimeDistributed(Attention()))(conv1)
flat = TimeDistributed(Flatten())(pool1)
#cnn_op= TimeDistributed(Dense(output_dim=3))(flat)

lstm = Bidirectional(LSTM(128, return_sequences=True, activation='tanh'))(flat)
bb = Flatten()(lstm)
op = Dense(1, activation='sigmoid')(bb)
fun_model = Model(inputs=[cnn_input], outputs=op)

from keras.utils.np_utils import to_categorical

#model = Sequential()
#model.add(Dropout(0.5,input_shape=(178,50,1)))
#model.add(TimeDistributed(Conv2D(64, kernel_size=13, activation='relu')))
#model.add(TimeDistributed(GlobalAveragePooling1D()))
#model.add(LSTM())
Exemplo n.º 8
0
def build_shrcnn_model(opts,
                       vocab_size=0,
                       char_vocabsize=0,
                       maxnum=50,
                       maxlen=50,
                       maxcnum=50,
                       maxclen=50,
                       maxcharlen=20,
                       embedd_dim=50,
                       embedding_weights=None,
                       verbose=False,
                       init_mean_value=None):
    # LSTM stacked over CNN based on sentence level
    N = maxnum
    L = maxlen

    cN = maxcnum
    cL = maxclen

    logger.info(
        "Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, nbfilters = %s, filter1_len = %s, drop rate = %s"
        % (N, L, embedd_dim, opts.nbfilters, opts.filter1_len, opts.dropout))

    word_input = Input(shape=(N * L, ), dtype='int32', name='word_input')
    context_input = Input(shape=(cN * cL, ),
                          dtype='int32',
                          name='context_input')

    emb = Embedding(output_dim=embedd_dim,
                    input_dim=vocab_size,
                    weights=embedding_weights,
                    mask_zero=True,
                    name='cx')
    cx = emb(context_input)
    cx_maskedout = ZeroMaskedEntries(name='cx_maskedout')(cx)
    drop_cx = Dropout(opts.dropout, name='drop_cx')(cx_maskedout)

    resh_C = Reshape((cN, cL, embedd_dim), name='resh_C')(drop_cx)

    czcnn = TimeDistributed(Conv1D(opts.nbfilters,
                                   opts.filter1_len,
                                   padding='valid'),
                            name='czcnn')(resh_C)

    x = emb(word_input)
    x_maskedout = ZeroMaskedEntries(name='x_maskedout')(x)
    drop_x = Dropout(opts.dropout, name='drop_x')(x_maskedout)

    resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)

    # add char-based CNN, concatenating with word embedding to compose word representation
    zcnn = TimeDistributed(Conv1D(opts.nbfilters,
                                  opts.filter1_len,
                                  padding='valid'),
                           name='zcnn')(resh_W)
    '''
    encoded_essay = Reshape((zcnn.shape[1].value*zcnn.shape[2].value, opts.nbfilters))(zcnn)
    encoded_context = Reshape((czcnn.shape[1].value*czcnn.shape[2].value, opts.nbfilters))(czcnn)
    # bidaf
    # Now we compute a similarity between the passage words and the question words, and
    # normalize the matrix in a couple of different ways for input into some more layers.
    matrix_attention_layer = MatrixAttention(name='essay_context_similarity')
    # matrix_attention_layer = LinearMatrixAttention(name='passage_question_similarity')

    # Shape: (batch_size, num_passage_words, num_question_words)
    essay_context_similarity = matrix_attention_layer([encoded_essay, encoded_context])


    # Shape: (batch_size, num_passage_words, num_question_words), normalized over question
    # words for each passage word.
    essay_context_attention = MaskedSoftmax()(essay_context_similarity)
    # Shape: (batch_size, num_passage_words, embedding_dim * 2)
    weighted_sum_layer = WeightedSum(name="essay_context_vectors", use_masking=False)
    essay_context_vectors = weighted_sum_layer([encoded_context, essay_context_attention])

    
    # Min's paper finds, for each document word, the most similar question word to it, and
    # computes a single attention over the whole document using these max similarities.
    # Shape: (batch_size, num_passage_words)
    context_essay_similarity = Max(axis=-1)(essay_context_similarity)
    # Shape: (batch_size, num_passage_words)
    context_essay_attention = MaskedSoftmax()(context_essay_similarity)
    # Shape: (batch_size, embedding_dim * 2)
    weighted_sum_layer = WeightedSum(name="question_passage_vector", use_masking=False)
    context_essay_vector = weighted_sum_layer([encoded_essay, context_essay_attention])

    # Then he repeats this question/passage vector for every word in the passage, and uses it
    # as an additional input to the hidden layers above.
    repeat_layer = RepeatLike(axis=1, copy_from_axis=1)
    # Shape: (batch_size, num_passage_words, embedding_dim * 2)
    tiled_context_essay_vector = repeat_layer([context_essay_vector, encoded_essay])

    complex_concat_layer = ComplexConcat(combination='1*2,1*3', name='final_merged_passage')
    final_merged_passage = complex_concat_layer([encoded_essay,
                                                 essay_context_vectors,
                                                 tiled_context_essay_vector])
    

    complex_concat_layer = ComplexConcat(combination='1*2', name='final_merged_passage')
    final_merged_passage = complex_concat_layer([encoded_essay,
                                                 essay_context_vectors])


    mcnn = Reshape((zcnn.shape[1].value, zcnn.shape[2].value, opts.nbfilters), name='mcnn')(final_merged_passage)
    '''

    # pooling mode
    if opts.mode == 'mot':
        logger.info("Use mean-over-time pooling on sentence")
        avg_zcnn = TimeDistributed(GlobalAveragePooling1D(),
                                   name='avg_zcnn')(zcnn)
    elif opts.mode == 'att':
        logger.info('Use attention-pooling on sentence')
        avg_zcnn = TimeDistributed(Attention(), name='avg_zcnn')(zcnn)
        avg_czcnn = TimeDistributed(Attention(), name='avg_czcnn')(czcnn)
    elif opts.mode == 'merged':
        logger.info(
            'Use mean-over-time and attention-pooling together on sentence')
        avg_zcnn1 = TimeDistributed(GlobalAveragePooling1D(),
                                    name='avg_zcnn1')(zcnn)
        avg_zcnn2 = TimeDistributed(Attention(), name='avg_zcnn2')(zcnn)
        avg_zcnn = merge([avg_zcnn1, avg_zcnn2],
                         mode='concat',
                         name='avg_zcnn')
    else:
        raise NotImplementedError

    hz_lstm = LSTM(opts.lstm_units, return_sequences=True,
                   name='hz_lstm')(avg_zcnn)
    chz_lstm = LSTM(opts.lstm_units, return_sequences=True,
                    name='chz_lstm')(avg_czcnn)

    if opts.mode == 'mot':
        logger.info('Use mean-over-time pooling on text')
        avg_hz_lstm = GlobalAveragePooling1D(name='avg_hz_lstm')(hz_lstm)
    elif opts.mode == 'att':
        logger.info('Use co-attention on text')

        # PART 2:
        # Now we compute a similarity between the passage words and the question words, and
        # normalize the matrix in a couple of different ways for input into some more layers.
        matrix_attention_layer = MatrixAttention(
            name='essay_context_similarity')
        # Shape: (batch_size, num_passage_words, num_question_words)
        essay_context_similarity = matrix_attention_layer([hz_lstm, chz_lstm])

        # Shape: (batch_size, num_passage_words, num_question_words), normalized over question
        # words for each passage word.
        essay_context_attention = MaskedSoftmax()(essay_context_similarity)
        weighted_sum_layer = WeightedSum(name="essay_context_vectors",
                                         use_masking=False)
        # Shape: (batch_size, num_passage_words, embedding_dim * 2)
        weighted_hz_lstm = weighted_sum_layer(
            [chz_lstm, essay_context_attention])

        # Min's paper finds, for each document word, the most similar question word to it, and
        # computes a single attention over the whole document using these max similarities.
        # Shape: (batch_size, num_passage_words)
        context_essay_similarity = Max(axis=-1)(essay_context_similarity)
        # Shape: (batch_size, num_passage_words)
        context_essay_attention = MaskedSoftmax()(context_essay_similarity)
        # Shape: (batch_size, embedding_dim * 2)
        weighted_sum_layer = WeightedSum(name="context_essay_vector",
                                         use_masking=False)
        context_essay_vector = weighted_sum_layer(
            [hz_lstm, context_essay_attention])

        # Then he repeats this question/passage vector for every word in the passage, and uses it
        # as an additional input to the hidden layers above.
        repeat_layer = RepeatLike(axis=1, copy_from_axis=1)
        # Shape: (batch_size, num_passage_words, embedding_dim * 2)
        tiled_context_essay_vector = repeat_layer(
            [context_essay_vector, hz_lstm])

        complex_concat_layer = ComplexConcat(combination='1,2,1*2,1*3',
                                             name='final_merged_passage')
        final_merged_passage = complex_concat_layer(
            [hz_lstm, weighted_hz_lstm, tiled_context_essay_vector])

        avg_hz_lstm = LSTM(opts.lstm_units,
                           return_sequences=False,
                           name='avg_hz_lstm')(final_merged_passage)

        # avg_hz_lstm = CoAttentionWithoutBi(name='avg_hz_lstm')([hz_lstm, weighted_hz_lstm])

        # avg_hz_lstm = Attention(name='avg_hz_lstm')(hz_lstm)
    elif opts.mode == 'merged':
        logger.info(
            'Use mean-over-time and attention-pooling together on text')
        avg_hz_lstm1 = GlobalAveragePooling1D(name='avg_hz_lstm1')(hz_lstm)
        avg_hz_lstm2 = Attention(name='avg_hz_lstm2')(hz_lstm)
        avg_hz_lstm = merge([avg_hz_lstm1, avg_hz_lstm2],
                            mode='concat',
                            name='avg_hz_lstm')
    else:
        raise NotImplementedError
    if opts.l2_value:
        logger.info("Use l2 regularizers, l2 value = %s" % opts.l2_value)
        y = Dense(units=1,
                  activation='sigmoid',
                  name='output',
                  W_regularizer=l2(opts.l2_value))(avg_hz_lstm)
    else:
        y = Dense(units=1, activation='sigmoid', name='output')(avg_hz_lstm)

    model = Model(inputs=[word_input, context_input], outputs=y)

    if opts.init_bias and init_mean_value:
        logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
        bias_value = (np.log(init_mean_value) -
                      np.log(1 - init_mean_value)).astype(K.floatx())
        model.layers[-1].b.set_value(bias_value)

    if verbose:
        model.summary()

    start_time = time.time()
    model.compile(loss='mse', optimizer='rmsprop')
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f s" % total_time)

    return model
Exemplo n.º 9
0
def build_hrcnn_model(opts,
                      vocab_size=0,
                      maxnum=50,
                      maxlen=50,
                      embedd_dim=50,
                      embedding_weights=None,
                      verbose=False,
                      init_mean_value=None):
    # LSTM stacked over CNN based on sentence level
    N = maxnum
    L = maxlen
    print(opts)
    logger.info(
        "Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, nbfilters = %s, filter1_len = %s, drop rate = %s"
        % (N, L, embedd_dim, opts.nbfilters, opts.filter1_len, opts.dropout))

    word_input = Input(shape=(N * L, ), dtype='int32', name='word_input')

    # embedding layer
    if opts.use_mask == 0:
        x = Embedding(output_dim=embedd_dim,
                      input_dim=vocab_size,
                      input_length=N * L,
                      weights=embedding_weights,
                      mask_zero=False,
                      name='x')(word_input)
        x_maskedout = x

    elif opts.use_mask == 1:
        x = Embedding(output_dim=embedd_dim,
                      input_dim=vocab_size,
                      input_length=N * L,
                      weights=embedding_weights,
                      mask_zero=True,
                      name='x')(word_input)
        x_maskedout = ZeroMaskedEntries(name='x_maskedout')(x)

    # drop out
    drop_x = Dropout(opts.dropout, name='drop_x')(x_maskedout)
    # reshape
    resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)
    # CNN layer
    zcnn = TimeDistributed(Convolution1D(opts.nbfilters,
                                         opts.filter1_len,
                                         border_mode='valid'),
                           name='zcnn')(resh_W)

    # pooling mode1 on CNN
    if opts.mode1 == 'mot':
        logger.info("Use mean-over-time pooling on sentence")
        avg_zcnn = TimeDistributed(GlobalAveragePooling1D(),
                                   name='avg_zcnn')(zcnn)
    elif opts.mode1 == 'att':
        logger.info('Use attention-pooling on sentence')
        avg_zcnn = TimeDistributed(Attention(), name='avg_zcnn')(zcnn)
    elif opts.mode1 == 'merged':
        logger.info(
            'Use mean-over-time and attention-pooling together on sentence')
        avg_zcnn1 = TimeDistributed(GlobalAveragePooling1D(),
                                    input_shape=(K.int_shape(zcnn)[2],
                                                 K.int_shape(zcnn)[3]),
                                    name='avg_zcnn1')(zcnn)
        avg_zcnn2 = TimeDistributed(Attention(), name='avg_zcnn2')(zcnn)
        avg_zcnn = merge([avg_zcnn1, avg_zcnn2],
                         mode='concat',
                         name='avg_zcnn')
    else:
        raise NotImplementedError

    hz_lstm = LSTM(opts.lstm_units, return_sequences=True,
                   name='hz_lstm')(avg_zcnn)

    # pooling mode1 on LSTM
    if opts.mode2 == 'mot':
        logger.info('Use mean-over-time pooling on text')
        avg_hz_lstm = GlobalAveragePooling1D(name='avg_hz_lstm')(hz_lstm)
    elif opts.mode2 == 'att':
        logger.info('Use attention-pooling on text')
        avg_hz_lstm = Attention(name='avg_hz_lstm')(hz_lstm)
    elif opts.mode2 == 'merged':
        logger.info(
            'Use mean-over-time and attention-pooling together on text')
        avg_hz_lstm1 = GlobalAveragePooling1D(name='avg_hz_lstm1')(hz_lstm)
        avg_hz_lstm2 = Attention(name='avg_hz_lstm2')(hz_lstm)
        avg_hz_lstm = merge([avg_hz_lstm1, avg_hz_lstm2],
                            mode='concat',
                            name='avg_hz_lstm')
    else:
        raise NotImplementedError

    # l2 regularization
    if opts.l2_value:
        logger.info("Use l2 regularizers, l2 value = %s" % opts.l2_value)
        y = Dense(output_dim=1,
                  activation='sigmoid',
                  name='output',
                  kernel_regularizer=regularizers.l2(
                      opts.l2_value))(avg_hz_lstm)
    else:
        y = Dense(output_dim=1, activation='sigmoid',
                  name='output')(avg_hz_lstm)

    model = Model(input=word_input, output=y)

    if opts.init_bias and init_mean_value:
        logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
        bias_value = (np.log(init_mean_value) -
                      np.log(1 - init_mean_value)).astype(K.floatx())
        model.layers[-1].bias = bias_value

    if verbose:
        model.summary()

    start_time = time.time()
    model.compile(loss='mse', optimizer='rmsprop')
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f s" % total_time)

    return model