예제 #1
0
def create_model():
    left_input = Input(shape=(max_seq_length, ), dtype='float32')
    right_input = Input(shape=(max_seq_length, ), dtype='float32')
    left_sen_representation = shared_model(left_input)
    right_sen_representation = shared_model(right_input)

    man_distance = ManDist()(
        [left_sen_representation, right_sen_representation])
    sen_representation = concatenate(
        [left_sen_representation, right_sen_representation, man_distance])
    similarity = Dense(1, activation='sigmoid')(Dense(2)(Dense(4)(
        Dense(16)(sen_representation))))
    model = Model(inputs=[left_input, right_input], outputs=[similarity])

    return model
if __name__ == '__main__':

    # 超参
    batch_size = 1024
    n_epoch = 9
    n_hidden = 50

    left_input = Input(shape=(max_seq_length,), dtype='float32')
    right_input = Input(shape=(max_seq_length,), dtype='float32')
    left_sen_representation = shared_model(left_input)
    right_sen_representation = shared_model(right_input)

    # 引入曼哈顿距离,把得到的变换concat上原始的向量再通过一个多层的DNN做了下非线性变换、sigmoid得相似度
    # 没有使用https://zhuanlan.zhihu.com/p/31638132中提到的马氏距离,尝试了曼哈顿距离、点乘和cos,效果曼哈顿最好
    man_distance = ManDist()([left_sen_representation, right_sen_representation])
    sen_representation = concatenate([left_sen_representation, right_sen_representation, man_distance])
    similarity = Dense(1, activation='sigmoid')(Dense(2)(Dense(4)(Dense(16)(sen_representation))))
    model = Model(inputs=[left_input, right_input], outputs=[similarity])

    model.compile(loss='mean_squared_error', optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
    model.summary()

    training_start_time = time()
    malstm_trained = model.fit([X_train['left'], X_train['right']], Y_train,
                               batch_size=batch_size, epochs=n_epoch,
                               validation_data=([X_validation['left'], X_validation['right']], Y_validation))
    training_end_time = time()
    print("Training time finished.\n%d epochs in %12.2f" % (n_epoch, training_end_time - training_start_time))

    # Plot accuracy
예제 #3
0
# x.add(GlobalMaxPool1D())
# x.add(Dense(250, activation='relu'))
# x.add(Dropout(0.3))
# x.add(Dense(1, activation='sigmoid'))
# LSTM
x.add(LSTM(n_hidden))

shared_model = x

# The visible layer
left_input = Input(shape=(max_seq_length, ), dtype='int32')
right_input = Input(shape=(max_seq_length, ), dtype='int32')

# Pack it all up into a Manhattan Distance model
malstm_distance = ManDist()(
    [shared_model(left_input),
     shared_model(right_input)])
model = Model(inputs=[left_input, right_input], outputs=[malstm_distance])

model.compile(loss='mean_squared_error',
              optimizer=tf.keras.optimizers.Adam(),
              metrics=['accuracy'])
model.summary()
shared_model.summary()

try:
    # Start trainings
    training_start_time = time()
    callbacks = [EarlyStopping(monitor='val_loss', patience=3)]
    malstm_trained = model.fit(
        [X_train['left'], X_train['right']],
예제 #4
0
                            embedding_dim,
                            weights=[embeddings],
                            input_length=max_seq_length,
                            trainable=False)

# Embedded version of the inputs
encoded_left = embedding_layer(left_input)
encoded_right = embedding_layer(right_input)

# Since this is a siamese network, both sides share the same LSTM
shared_lstm = LSTM(50)

left_output = shared_lstm(encoded_left)
right_output = shared_lstm(encoded_right)

malstm_distance = ManDist()([left_output, right_output])

# Pack it all up into a model
model = Model(inputs=[left_input, right_input], outputs=[malstm_distance])

if gpus >= 2:
    # `multi_gpu_model()` is a so quite buggy. it breaks the saved model.
    model = tf.keras.utils.multi_gpu_model(model, gpus=gpus)
model.compile(loss='mean_squared_error',
              optimizer=tf.keras.optimizers.Adam(),
              metrics=['accuracy'])
model.summary()

# Start trainings
training_start_time = time()
malstm_trained = model.fit(
예제 #5
0
# x.add(Conv1D(250, kernel_size=5, activation='relu'))
# x.add(GlobalMaxPool1D())
# x.add(Dense(250, activation='relu'))
# x.add(Dropout(0.3))
# x.add(Dense(1, activation='sigmoid'))
# LSTM
x.add(LSTM(n_hidden))

shared_model = x

# The visible layer
left_input = Input(shape=(max_seq_length, ), dtype='int32')
right_input = Input(shape=(max_seq_length, ), dtype='int32')

# Pack it all up into a Manhattan Distance model
malstm_distance = ManDist()([x(left_input), x(right_input)])
model = Model(inputs=[left_input, right_input], outputs=[malstm_distance])

if gpus >= 2:
    # `multi_gpu_model()` is a so quite buggy. it breaks the saved model.
    model = tf.keras.utils.multi_gpu_model(model, gpus=gpus)
model.compile(loss='mean_squared_error',
              optimizer=tf.keras.optimizers.Adam(),
              metrics=['accuracy'])
model.summary()
shared_model.summary()

# Start trainings
training_start_time = time()
malstm_trained = model.fit(
    [X_train['left'], X_train['right']],