def carnn(embedding_matrix, config, compare_out_size=CARNN_COMPARE_LAYER_OUTSIZE, rnn_size=CARNN_RNN_SIZE, rnn_dropout=CARNN_AGGREATION_DROPOUT): q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input') q2 = Input((config['max_length'], ), dtype='int32', name='q2_input') activation = 'elu' compare_dim = 500 compare_dropout = 0.2 embedding_layer = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], trainable=config['embed_trainable'], weights=[embedding_matrix] # mask_zero=True ) q1_embed = embedding_layer(q1) q2_embed = embedding_layer(q2) # bsz, 1, emb_dims q1_embed = BatchNormalization(axis=2)(q1_embed) q2_embed = BatchNormalization(axis=2)(q2_embed) q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed) q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed) highway_encoder = TimeDistributed(Highway(activation='relu')) self_attention = SelfAttention(d_model=embedding_matrix.shape[1]) q1_encoded = highway_encoder(q1_embed, ) q2_encoded = highway_encoder(q2_embed, ) s1_encoded = self_attention(q1, q1_encoded) s2_encoded = self_attention(q2, q2_encoded) # Attention q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded) # Compare q1_combined1 = Concatenate()([ q1_encoded, q2_aligned, interaction(q1_encoded, q2_aligned), ]) q1_combined2 = Concatenate()([ q2_aligned, q1_encoded, interaction(q1_encoded, q2_aligned), ]) q2_combined1 = Concatenate()([ q2_encoded, q1_aligned, interaction(q2_encoded, q1_aligned), ]) q2_combined2 = Concatenate()([ q1_aligned, q2_encoded, interaction(q2_encoded, q1_aligned), ]) s1_combined1 = Concatenate()([ q1_encoded, s1_encoded, interaction(q1_encoded, s1_encoded), ]) s1_combined2 = Concatenate()([ s1_encoded, q1_encoded, interaction(q1_encoded, s1_encoded), ]) s2_combined1 = Concatenate()([ q2_encoded, s2_encoded, interaction(q2_encoded, s2_encoded), ]) s2_combined2 = Concatenate()([ s2_encoded, q2_encoded, interaction(q2_encoded, s2_encoded), ]) compare_layers_d = [ Dense(compare_dim, activation=activation), Dropout(compare_dropout), Dense(compare_out_size, activation=activation), Dropout(compare_dropout), ] compare_layers_g = [ Dense(compare_dim, activation=activation), Dropout(compare_dropout), Dense(compare_out_size, activation=activation), Dropout(compare_dropout), ] # NOTE these can be optimized q1_compare1 = time_distributed(q1_combined1, compare_layers_d) q1_compare2 = time_distributed(q1_combined2, compare_layers_d) q1_compare = Average()([q1_compare1, q1_compare2]) q2_compare1 = time_distributed(q2_combined1, compare_layers_d) q2_compare2 = time_distributed(q2_combined2, compare_layers_d) q2_compare = Average()([q2_compare1, q2_compare2]) s1_compare1 = time_distributed(s1_combined1, compare_layers_g) s1_compare2 = time_distributed(s1_combined2, compare_layers_g) s1_compare = Average()([s1_compare1, s1_compare2]) s2_compare1 = time_distributed(s2_combined1, compare_layers_g) s2_compare2 = time_distributed(s2_combined2, compare_layers_g) s2_compare = Average()([s2_compare1, s2_compare2]) # Aggregate q1_encoded = Concatenate()([q1_encoded, q1_compare, s1_compare]) q2_encoded = Concatenate()([q2_encoded, q2_compare, s2_compare]) aggreate_rnn = CuDNNGRU(rnn_size, return_sequences=True) q1_aggreated = aggreate_rnn(q1_encoded) q1_aggreated = Dropout(rnn_dropout)(q1_aggreated) q2_aggreated = aggreate_rnn(q2_encoded) q2_aggreated = Dropout(rnn_dropout)(q2_aggreated) # Pooling q1_rep = apply_multiple(q1_aggreated, [ GlobalAvgPool1D(), GlobalMaxPool1D(), ]) q2_rep = apply_multiple(q2_aggreated, [ GlobalAvgPool1D(), GlobalMaxPool1D(), ]) q_diff = Lambda(lambda x: K.abs(x[0] - x[1]))([q1_rep, q2_rep]) q_multi = Lambda(lambda x: x[0] * x[1])([q1_rep, q2_rep]) feature_input = Input(shape=(config['feature_length'], )) feature_dense = BatchNormalization()(feature_input) feature_dense = Dense(config['dense_dim'], activation='relu')(feature_dense) h_all1 = Concatenate()([q1_rep, q2_rep, q_diff, q_multi, feature_dense]) h_all2 = Concatenate()([q2_rep, q1_rep, q_diff, q_multi, feature_dense]) h_all1 = Dropout(0.5)(h_all1) h_all2 = Dropout(0.5)(h_all2) dense = Dense(256, activation='relu') h_all1 = dense(h_all1) h_all2 = dense(h_all2) h_all = Average()([h_all1, h_all2]) predictions = Dense(1, activation='sigmoid')(h_all) model = Model(inputs=[q1, q2, feature_input], outputs=predictions) opt = optimizers.get(config['optimizer']) K.set_value(opt.lr, config['learning_rate']) model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1]) return model
def esim_word_char(embedding_matrix, char_embedding_matrix, config): if config['rnn'] == 'gru' and config['gpu']: word_encode = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) word_compose = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) char_encode = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) char_compose = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) else: word_encode = Bidirectional( CuDNNLSTM(config['rnn_output_size'], return_sequences=True)) word_compose = Bidirectional( CuDNNLSTM(config['rnn_output_size'], return_sequences=True)) char_encode = Bidirectional( CuDNNLSTM(config['rnn_output_size'], return_sequences=True)) char_compose = Bidirectional( CuDNNLSTM(config['rnn_output_size'], return_sequences=True)) q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input') q2 = Input((config['max_length'], ), dtype='int32', name='q2_input') embedding_layer = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], trainable=config['embed_trainable'], weights=[embedding_matrix] # mask_zero=True ) q1_embed = embedding_layer(q1) q2_embed = embedding_layer(q2) # bsz, 1, emb_dims q1_embed = BatchNormalization(axis=2)(q1_embed) q2_embed = BatchNormalization(axis=2)(q2_embed) q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed) q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed) q1_encoded = word_encode(q1_embed) q2_encoded = word_encode(q2_embed) q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded) q1_combined = Concatenate()( [q1_encoded, q2_aligned, submult(q1_encoded, q2_aligned)]) q2_combined = Concatenate()( [q2_encoded, q1_aligned, submult(q2_encoded, q1_aligned)]) # q1_combined = Dropout(self.config['dense_dropout'])(q1_combined) # q2_combined = Dropout(self.config['dense_dropout'])(q2_combined) q1_compare = word_compose(q1_combined) q2_compare = word_compose(q2_combined) # Aggregate q1_rep = apply_multiple(q1_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) q2_rep = apply_multiple(q2_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) # Classifier sub_rep = Lambda(lambda x: K.abs(x[0] - x[1]))([q1_rep, q2_rep]) mul_rep = Lambda(lambda x: x[0] * x[1])([q1_rep, q2_rep]) # Classifier merged = Concatenate()([q1_rep, q2_rep, sub_rep, mul_rep]) q1_char = Input(shape=(config['char_max_length'], ), dtype='int32', name='q1_char_input') q2_char = Input((config['char_max_length'], ), dtype='int32', name='q2_char_input') char_embedding_layer = Embedding(char_embedding_matrix.shape[0], char_embedding_matrix.shape[1], trainable=config['embed_trainable'], weights=[char_embedding_matrix] # mask_zero=True ) q1_embed_char = char_embedding_layer(q1_char) q2_embed_char = char_embedding_layer(q2_char) # bsz, 1, emb_dims q1_embed_char = BatchNormalization(axis=2)(q1_embed_char) q2_embed_char = BatchNormalization(axis=2)(q2_embed_char) q1_embed_char = SpatialDropout1D( config['spatial_dropout_rate'])(q1_embed_char) q2_embed_char = SpatialDropout1D( config['spatial_dropout_rate'])(q2_embed_char) q1_encoded_char = char_encode(q1_embed_char) q2_encoded_char = char_encode(q2_embed_char) q1_aligned_char, q2_aligned_char = soft_attention_alignment( q1_encoded_char, q2_encoded_char) q1_combined_char = Concatenate()([ q1_encoded_char, q2_aligned_char, submult(q1_encoded_char, q2_aligned_char) ]) q2_combined_char = Concatenate()([ q2_encoded_char, q1_aligned_char, submult(q2_encoded_char, q1_aligned_char) ]) # q1_combined = Dropout(self.config['dense_dropout'])(q1_combined) # q2_combined = Dropout(self.config['dense_dropout'])(q2_combined) q1_compare_char = char_compose(q1_combined_char) q2_compare_char = char_compose(q2_combined_char) # Aggregate q1_rep_char = apply_multiple( q1_compare_char, [GlobalAvgPool1D(), GlobalMaxPool1D()]) q2_rep_char = apply_multiple( q2_compare_char, [GlobalAvgPool1D(), GlobalMaxPool1D()]) # Classifier sub_rep_char = Lambda(lambda x: K.abs(x[0] - x[1]))( [q1_rep_char, q2_rep_char]) mul_rep_char = Lambda(lambda x: x[0] * x[1])([q1_rep_char, q2_rep_char]) # Classifier merged = Concatenate()([q1_rep, q2_rep, sub_rep, mul_rep]) merged_char = Concatenate()( [q1_rep_char, q2_rep_char, sub_rep_char, mul_rep_char]) dense = BatchNormalization()(merged) dense = Dense(config['dense_dim'], activation='elu')(dense) dense_char = BatchNormalization()(merged_char) dense_char = Dense(config['dense_dim'], activation='elu')(dense_char) feature_input = Input(shape=(config['feature_length'], )) feature_dense = BatchNormalization()(feature_input) feature_dense = Dense(config['dense_dim'], activation='relu')(feature_dense) dense = Concatenate()([dense, dense_char, feature_dense]) dense = BatchNormalization()(dense) dense = Dropout(config['dense_dropout'])(dense) dense = Dense(config['dense_dim'], activation='elu')(dense) dense = BatchNormalization()(dense) dense = Dropout(config['dense_dropout'])(dense) predictions = Dense(1, activation='sigmoid')(dense) model = Model(inputs=[q1, q2, q1_char, q2_char, feature_input], outputs=predictions) opt = optimizers.get(config['optimizer']) K.set_value(opt.lr, config['learning_rate']) model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1]) return model
def decom(embedding_matrix, config): q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input') q2 = Input((config['max_length'], ), dtype='int32', name='q2_input') projection_hidden = 300 activation = 'elu' projection_dropout = 0.2 projection_dim = 300 compare_dim = 500 # 300 compare_dropout = 0.2 embedding_layer = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], trainable=config['embed_trainable'], weights=[embedding_matrix] # mask_zero=True ) q1_embed = embedding_layer(q1) q2_embed = embedding_layer(q2) # bsz, 1, emb_dims q1_embed = BatchNormalization(axis=2)(q1_embed) q2_embed = BatchNormalization(axis=2)(q2_embed) q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed) q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed) highway_encoder = TimeDistributed(Highway(activation='relu')) q1_encoded = highway_encoder(q1_embed, ) q2_encoded = highway_encoder(q2_embed, ) # Attention q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded) # Compare q1_combined = Concatenate()( [q1_encoded, q2_aligned, interaction(q1_encoded, q2_aligned)]) q2_combined = Concatenate()( [q2_encoded, q1_aligned, interaction(q2_encoded, q1_aligned)]) compare_layers = [ Dense(compare_dim, activation=activation), Dropout(compare_dropout), Dense(compare_dim, activation=activation), Dropout(compare_dropout), ] q1_compare = time_distributed(q1_combined, compare_layers) q2_compare = time_distributed(q2_combined, compare_layers) # Aggregate q1_rep = apply_multiple(q1_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) q2_rep = apply_multiple(q2_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) sub_rep = Lambda(lambda x: K.abs(x[0] - x[1]))([q1_rep, q2_rep]) mul_rep = Lambda(lambda x: x[0] * x[1])([q1_rep, q2_rep]) # Dense meta featues # meta_densed = BatchNormalization()(meta_features) # meta_densed = Highway(activation='relu')(meta_densed) # meta_densed = Dropout(0.2)(meta_densed) # Classifier merged = Concatenate()([q1_rep, q2_rep, sub_rep, mul_rep]) dense = BatchNormalization()(merged) dense = Dense(config['dense_dim'], activation='elu')(dense) dense = BatchNormalization()(dense) dense = Dropout(config['dense_dropout'])(dense) dense = Dense(config['dense_dim'], activation='elu')(dense) dense = BatchNormalization()(dense) dense = Dropout(config['dense_dropout'])(dense) predictions = Dense(1, activation='sigmoid')(dense) model = Model(inputs=[q1, q2], outputs=predictions) opt = optimizers.get(config['optimizer']) K.set_value(opt.lr, config['learning_rate']) model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1]) return model