def create_critic_network(self, S, G=None): if self.args['--network'] == '1': l1 = concatenate([S, G]) l2 = Dense(400, activation="relu")(l1) l3 = concatenate([l2, G]) l4 = Dense(300, activation="relu")(l3) Q_values = Dense(self.num_actions)(l4) elif self.args['--network'] == '2': l1 = subtract([S, G]) l2 = concatenate([l1, S]) l3 = Dense(400, activation="relu")(l2) l4 = Dense(300, activation="relu")(l3) Q_values = Dense(self.num_actions)(l4) elif self.args['--network'] == '3': shared_l = Dense(200, activation='relu') l1 = shared_l(S) l2 = shared_l(G) l3 = subtract([l1, l2]) l4 = Dense(200, activation="relu")(l3) l5 = Dense(300, activation="relu")(l4) Q_values = Dense(self.num_actions)(l5) else: l1 = concatenate([S, G]) l2 = Dense(400, activation="relu")(l1) l3 = Dense(300, activation="relu")(l2) Q_values = Dense(self.num_actions)(l3) return Q_values
def create_critic_network(self, S, V): if self.network == '0': L1 = concatenate([multiply([subtract([S, G]), M]), S]) L2 = Dense(400, activation="relu", kernel_initializer=lecun_uniform(), kernel_regularizer=l2(0.01))(L1) L3 = Dense(300, activation="relu", kernel_initializer=lecun_uniform(), kernel_regularizer=l2(0.01))(L2) Q_values = Dense(self.env.action_dim, activation='linear', kernel_initializer=RandomUniform(minval=-3e-4, maxval=3e-4), kernel_regularizer=l2(0.01), bias_initializer=RandomUniform(minval=-3e-4, maxval=3e-4))(L3) else: L1 = Dense(200, activation="relu", kernel_initializer=lecun_uniform(), kernel_regularizer=l2(0.01)) L2 = Dense(300, activation="relu", kernel_initializer=lecun_uniform(), kernel_regularizer=l2(0.01)) i1 = multiply([subtract([S, G]), M]) i2 = S h1 = L1(i1) h2 = L1(i2) h3 = concatenate([h1, h2]) h4 = L2(h3) Q_values = Dense(self.env.action_dim, activation='linear', kernel_initializer=RandomUniform(minval=-3e-4, maxval=3e-4), kernel_regularizer=l2(0.01), bias_initializer=RandomUniform(minval=-3e-4, maxval=3e-4))(h4) return Q_values
def create_actor_network(self, S, G=None, M=None): input = concatenate([multiply([subtract([S, G]), M]), S]) h0 = Dense(400, activation="relu", kernel_initializer=lecun_uniform())(input) h1 = Dense(300, activation="relu", kernel_initializer=lecun_uniform())(h0) V = Dense(self.a_dim[0], activation="tanh", kernel_initializer=RandomUniform(minval=-3e-3, maxval=3e-3), bias_initializer=RandomUniform(minval=-3e-3, maxval=3e-3))(h1) return V
def initModels(self): S_c = Input(shape=self.s_dim) A_c = Input(shape=self.a_dim) G_c = Input(shape=self.g_dim) M_c = Input(shape=self.g_dim) TARGETS = Input(shape=(1,)) layers, qval = self.create_critic_network(S_c, A_c, G_c, M_c) self.qvalModel = Model([S_c, A_c, G_c, M_c], qval) loss_dqn = K.mean(K.square(qval - TARGETS), axis=0) inputs = [S_c, A_c, G_c, M_c, TARGETS] outputs = [loss_dqn, qval] self.updatesQval = Adam(lr=0.001).get_updates(params=self.qvalModel.trainable_weights, loss=loss_dqn) self.trainQval = K.function(inputs=inputs, outputs=outputs, updates=self.updatesQval) S_a = Input(shape=self.s_dim) G_a = Input(shape=self.g_dim) M_a = Input(shape=self.g_dim) action = self.create_actor_network(S_a, G_a, M_a) self.actionModel = Model([S_a, G_a, M_a], action) self.action = K.function(inputs=[S_a, G_a, M_a], outputs=[action], updates=None) L1, L2, L3 = layers qvalTrain = L1(concatenate([multiply([subtract([S_a, G_a]), M_a]), S_a])) qvalTrain = concatenate([qvalTrain, action]) qvalTrain = L2(qvalTrain) qvalTrain = L3(qvalTrain) self.criticActionGrads = K.gradients(qvalTrain, action)[0] low = tf.convert_to_tensor(self.env.action_space.low) high = tf.convert_to_tensor(self.env.action_space.high) width = high - low pos = K.cast(K.greater_equal(self.criticActionGrads, 0), dtype='float32') pos *= high - action neg = K.cast(K.less(self.criticActionGrads, 0), dtype='float32') neg *= action - low inversion = (pos + neg) / width self.invertedCriticActionGrads = self.criticActionGrads * inversion if self.inv_grads == '0': self.actorGrads = tf.gradients(action, self.actionModel.trainable_weights, grad_ys=-self.criticActionGrads) else: self.actorGrads = tf.gradients(action, self.actionModel.trainable_weights, grad_ys=-self.invertedCriticActionGrads) self.updatesActor = DDPGAdam(lr=0.0001).get_updates(params=self.actionModel.trainable_weights, loss=None, grads=self.actorGrads) inputs = [S_a, G_a, M_a] outputs = [] self.trainActor = K.function(inputs=inputs, outputs=outputs, updates=self.updatesActor)
def initModels(self): S_c = Input(shape=self.s_dim) A_c = Input(shape=(1, ), dtype='uint8') G_c = Input(shape=self.g_dim) M_c = Input(shape=self.g_dim) TARGETS = Input(shape=(1, )) layers, qvals = self.create_critic_network(S_c, G_c, M_c) self.qvalsModel = Model([S_c, G_c, M_c], qvals) self.qvals = K.function(inputs=[S_c, G_c, M_c], outputs=[qvals], updates=None) actionFilter = K.squeeze(K.one_hot(A_c, self.a_dim), axis=1) qval = K.sum(actionFilter * qvals, axis=1, keepdims=True) self.qval = K.function(inputs=[S_c, G_c, M_c, A_c], outputs=[qval], updates=None) loss_dqn = K.mean(K.square(qval - TARGETS), axis=0) inputs = [S_c, A_c, G_c, M_c, TARGETS] outputs = [loss_dqn, qval] self.updatesQval = Adam(lr=0.001).get_updates( params=self.qvalsModel.trainable_weights, loss=loss_dqn) self.trainCritic = K.function(inputs=inputs, outputs=outputs, updates=self.updatesQval) S_a = Input(shape=self.s_dim) G_a = Input(shape=self.g_dim) M_a = Input(shape=self.g_dim) probs = self.create_actor_network(S_a, G_a, M_a) self.probsModel = Model([S_a, G_a, M_a], probs) self.probs = K.function(inputs=[S_a, G_a, M_a], outputs=[probs], updates=None) L1, L2, L3 = layers input = concatenate([multiply([subtract([S_a, G_a]), M_a]), S_a]) qvalTrain = L1(input) qvalTrain = L2(qvalTrain) qvalTrain = L3(qvalTrain) val = K.sum(qvalTrain * probs, axis=1, keepdims=True) inputs = [S_a, G_a, M_a] outputs = [probs, qvalTrain, val] self.updatesActor = Adam(lr=0.001).get_updates( params=self.probsModel.trainable_weights, loss=-val) self.trainActor = K.function(inputs=inputs, outputs=outputs, updates=self.updatesActor)
def create_critic_network(self, S, A, G=None, M=None): input = concatenate([multiply([subtract([S, G]), M]), S]) L1 = Dense(400, activation="relu", kernel_initializer=lecun_uniform(), kernel_regularizer=l2(0.01)) L1out = L1(input) L1out = concatenate([L1out, A]) L2 = Dense(300, activation="relu", kernel_initializer=lecun_uniform(), kernel_regularizer=l2(0.01)) L2out = L2(L1out) L3 = Dense(1, activation='linear', kernel_initializer=RandomUniform(minval=-3e-4, maxval=3e-4), kernel_regularizer=l2(0.01), bias_initializer=RandomUniform(minval=-3e-4, maxval=3e-4)) qval = L3(L2out) return [L1, L2, L3], qval
def Model_sent2tag_MLP_1(sentvocabsize, tagvocabsize, sent_W, tag_W, s2v_k, tag2v_k): input_sent = Input(shape=(1, ), dtype='int32') sent_embedding = Embedding(input_dim=sentvocabsize, output_dim=s2v_k, input_length=1, mask_zero=False, trainable=False, weights=[sent_W])(input_sent) input_tag = Input(shape=(1, ), dtype='int32') tag_embedding = Embedding(input_dim=tagvocabsize, output_dim=tag2v_k, input_length=1, mask_zero=False, trainable=False, weights=[tag_W])(input_tag) x1_1 = Flatten()(sent_embedding) x2_0 = Flatten()(tag_embedding) # x1_1 = Dense(100, activation='tanh')(x1_0) sub = subtract([x2_0, x1_1]) mul = multiply([x2_0, x1_1]) max = maximum([x2_0, x1_1]) avg = average([x2_0, x1_1]) class_input = concatenate([x2_0, x1_1, sub, mul, max, avg], axis=-1) # class_input = Flatten()(class_input) class_mlp1 = Dense(200, activation='tanh')(class_input) class_mlp1 = Dropout(0.5)(class_mlp1) class_mlp2 = Dense(2)(class_mlp1) class_output = Activation('softmax', name='CLASS')(class_mlp2) # distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([mlp_x1_2, x2_0]) # distance = dot([x1_0, x2_0], axes=-1, normalize=True) mymodel = Model([input_sent, input_tag], class_output) mymodel.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=0.001), metrics=['acc']) return mymodel
def load_model(number, nb_words, n_handcrafted_features): embedding_matrix = np.zeros((nb_words, GloVe_embedding_dim)) embedding_layer = Embedding(nb_words, GloVe_embedding_dim, weights=[embedding_matrix], input_length=max_sentence_length, trainable=False) lstm_layer = Bidirectional(LSTM(100, recurrent_dropout=0.4, return_sequences=False), merge_mode='mul') # sentence 1 sequence_1_input = Input(shape=(max_sentence_length, ), dtype="int32") embedded_sequences_1 = embedding_layer(sequence_1_input) s1 = lstm_layer(embedded_sequences_1) s1 = BatchNormalization()(s1) # sentence 2 sequence_2_input = Input(shape=(max_sentence_length, ), dtype="int32") embedded_sequences_2 = embedding_layer(sequence_2_input) s2 = lstm_layer(embedded_sequences_2) s2 = BatchNormalization()(s2) # handcrafted features nlp_input = Input(shape=(n_handcrafted_features, ), dtype="float32") features_dense = BatchNormalization()(nlp_input) features_dense = Dense(100, activation="relu")(features_dense) features_dense = BatchNormalization()(features_dense) # computing cosine similarity csd = dot([s1, s2], axes=-1, normalize=True) # computng multiplication between the 2 vectors mul_v = multiply([s1, s2]) # compute the absolute difference x_y = subtract([s1, s2]) merged = Lambda(lambda x: abs(x))(x_y) # merge the features merged = concatenate([merged, mul_v]) merged = Dropout(0.3)(merged) merged = concatenate([merged, features_dense, csd]) merged = BatchNormalization()(merged) merged = Dense(200, activation="relu")(merged) merged = Dropout(0.2)(merged) merged = BatchNormalization()(merged) out = Dense(2, activation="softmax")(merged) model = Model(inputs=[sequence_1_input, sequence_2_input, nlp_input], outputs=out) model.compile(loss="binary_crossentropy", optimizer="nadam", metrics=['acc']) best_model_path = "Kfold/best_model_" + str(number) model.load_weights(best_model_path) return model
def _build_network(self, vocab_size, maxlen, emb_weights=[], c_emb_weights=[], hidden_units=256, trainable=True, batch_size=1): print('Building model...') context_input = Input(name='context', batch_shape=(batch_size, maxlen)) if (len(c_emb_weights) == 0): c_emb = Embedding(vocab_size, 256, input_length=maxlen, embeddings_initializer='glorot_normal', trainable=trainable)(context_input) else: c_emb = Embedding(vocab_size, c_emb_weights.shape[1], input_length=maxlen, weights=[c_emb_weights], trainable=trainable)(context_input) c_lstm1 = LSTM(hidden_units, kernel_initializer='he_normal', recurrent_initializer='orthogonal', bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid', kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01), dropout=0.25, recurrent_dropout=.0, unit_forget_bias=False, return_sequences=False)(c_emb) c_lstm2 = LSTM(hidden_units, kernel_initializer='he_normal', recurrent_initializer='orthogonal', bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid', kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01), dropout=0.25, recurrent_dropout=.0, unit_forget_bias=False, return_sequences=False, go_backwards=True)(c_emb) c_merged = add([c_lstm1, c_lstm2]) c_merged = Dropout(0.25)(c_merged) text_input = Input(name='text', batch_shape=(batch_size, maxlen)) if (len(emb_weights) == 0): emb = Embedding(vocab_size, 256, input_length=maxlen, embeddings_initializer='glorot_normal', trainable=trainable)(text_input) else: emb = Embedding(vocab_size, c_emb_weights.shape[1], input_length=maxlen, weights=[emb_weights], trainable=trainable)(text_input) t_lstm1 = LSTM(hidden_units, kernel_initializer='he_normal', recurrent_initializer='he_normal', bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid', kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01), dropout=0.25, recurrent_dropout=0.25, unit_forget_bias=False, return_sequences=False)(emb) t_lstm2 = LSTM(hidden_units, kernel_initializer='he_normal', recurrent_initializer='he_normal', bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid', kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01), dropout=0.25, recurrent_dropout=0.25, unit_forget_bias=False, return_sequences=False, go_backwards=True)(emb) t_merged = add([t_lstm1, t_lstm2]) t_merged = Dropout(0.25)(t_merged) merged = subtract([c_merged, t_merged]) dnn_1 = Dense(hidden_units, kernel_initializer="he_normal", activation='sigmoid')(merged) dnn_1 = Dropout(0.25)(dnn_1) dnn_2 = Dense(2, activation='sigmoid')(dnn_1) softmax = Activation('softmax')(dnn_2) model = Model(inputs=[context_input, text_input], outputs=softmax) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print('No of parameter:', model.count_params()) print(model.summary()) return model
def __init__(self, unit=64, dropout=0.2, max_len=39, update_num=3, regularization=0.1, embedding_matrix=None, use_cudnn=False, use_share=False, use_one_cell=False): self.unit = unit self.dropout = dropout self.use_share = use_share self.use_one_cell = use_one_cell self.regularization = l2(regularization) Q1_input = Input(shape=(max_len, ), dtype='int32', name='Q1') # (?, L) Q2_input = Input(shape=(max_len, ), dtype='int32', name='Q2') # (?, L) # Q1_m = Input(shape=(max_len,), dtype='int32', name='mask1') # Q2_m = Input(shape=(max_len,), dtype='int32', name='mask2') # magic = Input(shape=(4,), dtype='float32', name='magic') embedding = Embedding(input_dim=embedding_matrix.shape[0], output_dim=embedding_matrix.shape[1], mask_zero=True, weights=[embedding_matrix], trainable=False) # bn = BatchNormalization() Q1 = embedding(Q1_input) Q2 = embedding(Q2_input) GRULayer = CuDNNGRU if use_cudnn else GRU for i in range(update_num): Q1, Q2 = self.update_module(Q1, Q2, GRULayer) Q1, Q2 = self.attention(Q1, Q2, GRULayer, implementation=3) # bn1 = BatchNormalization() # bnm = BatchNormalization() # bns = BatchNormalization() # regression = Bilinear(implementation=0, activation='tanh')([Q1, Q2]) att = SelfAttention(1, activation='tanh') Q1 = att(Q1) Q2 = att(Q2) vector = concatenate([ # Q1, Q2, merge.multiply([Q1, Q2]), # merge.subtract([Q1, Q2], use_abs=True), merge.subtract([Q1, Q2]), merge.average([Q1, Q2]) ]) # vector = merge.subtract([Q1, Q2]) # vector = merge.add([Q1, Q2]) # vector = Dropout(self.dropout)(vector) # vector = Dense(units=512, activation='tanh')(vector) # magic_new = Dense(units=64, activation='tanh')(magic) # vector = concatenate([vector, magic_new]) vector = Dropout(self.dropout)(vector) vector = Dense(units=256, activation='tanh')(vector) vector = Dropout(self.dropout)(vector) regression = Dense(units=1, activation='sigmoid')(vector) super(IAM, self).__init__(inputs=[Q1_input, Q2_input], outputs=regression)
def train_wgan_with_grad_penalty(prior_gen, generator, data_gen, critic, batch_size, epochs, batches_per_epoch=100, optimizer=Adam(lr=1e-4, beta_1=0, beta_2=0.9), grad_pen_coef=10., critic_gen_train_ratio=2, callbacks=None): # build model to train the critic data_shape = critic.input_shape[1:] real_critic_input = Input(shape=data_shape, name='real_in') fake_critic_input = Input(shape=data_shape, name='fake_in') interp_critic_input = Input(shape=data_shape, name='interp_in') real_critic_score = critic(real_critic_input) fake_critic_score = critic(fake_critic_input) interp_critic_score = critic(interp_critic_input) critic_loss = subtract([fake_critic_score, real_critic_score]) gradient_penalty = GradPenLayer()( [interp_critic_input, interp_critic_score]) critic_train_mdl = Model( [real_critic_input, fake_critic_input, interp_critic_input], [critic_loss, gradient_penalty]) critic_train_mdl.compile(optimizer=optimizer, loss=lambda y_true, y_pred: y_pred, loss_weights=[1., grad_pen_coef]) # build model to train generator prior_input = Input(shape=generator.input_shape[1:], name='prior_in') critic.trainable = False critic_on_generator_score = critic(generator(prior_input)) generator_train_mdl = Model(prior_input, critic_on_generator_score) generator_train_mdl.compile(optimizer=optimizer, loss=lambda y_true, y_pred: -y_pred) # init callbacks callbacks = callbacks or [] callbacks = CallbackList(callbacks) callbacks.set_model({'generator': generator, 'critic': critic}) callbacks.set_params({ 'batch_size': batch_size, 'epochs': epochs, 'steps': batches_per_epoch, 'samples': batches_per_epoch * batch_size, 'prior_gen': prior_gen, 'data_gen': data_gen, }) # train print('Training on {} samples for {} epochs'.format( batches_per_epoch * batch_size, epochs)) callbacks.on_train_begin() for e in range(epochs): print('Epoch {}/{}'.format(e + 1, epochs)) callbacks.on_epoch_begin(e) progbar = Progbar(target=batches_per_epoch * batch_size) dummy_y = np.array([None] * batch_size) for b in range(batches_per_epoch): callbacks.on_batch_begin(b) batch_losses = np.zeros(shape=3) for critic_upd in range(critic_gen_train_ratio): real_batch = data_gen(batch_size) fake_batch = generator.predict(prior_gen(batch_size)) weights = np.random.uniform(size=batch_size) weights = weights.reshape((-1, ) + (1, ) * (len(real_batch.shape) - 1)) interp_batch = weights * real_batch + (1. - weights) * fake_batch x_batch = { 'real_in': real_batch, 'fake_in': fake_batch, 'interp_in': interp_batch } cur_losses = np.array( critic_train_mdl.train_on_batch(x=x_batch, y=[dummy_y, dummy_y])) batch_losses += cur_losses generator_train_mdl.train_on_batch(x=prior_gen(batch_size), y=dummy_y) losses_names = ('total_loss', 'critic_loss', 'gradient_pen') progbar.add(batch_size, zip(losses_names, batch_losses)) callbacks.on_batch_end(b) progbar.update(batches_per_epoch * batch_size) callbacks.on_epoch_end(e) callbacks.on_train_end()
def deep_neural_net_gru(train_data_1, train_data_2, train_data_3, train_labels, test_data_1, test_data_2, test_data_3, test_labels, max_len, len_chars, bidirectional, hidden_units, n): early_stop = EarlyStopping(monitor='loss', patience=2, verbose=1) checkpointer = ModelCheckpoint( filepath="/home/amarinho/data-amarinho/checkpoint" + str(n) + ".hdf5", verbose=1, save_best_only=True) lstm1 = GRU(hidden_units, implementation=2, return_sequences=True, name='lstm1') lstm2 = GRU(hidden_units, implementation=2, return_sequences=True, name='lstm2') lstm3 = GRU(hidden_units, implementation=2, return_sequences=True, name='lstm3') lstm1 = Bidirectional(lstm1, name='bilstm1') lstm2 = Bidirectional(lstm2, name='bilstm2') lstm3 = Bidirectional(lstm3, name='bilstm3') input_word1 = Input(shape=(max_len, len_chars)) input_word2 = Input(shape=(max_len, len_chars)) input_feature = Input(shape=(max_len, )) mask = Masking(mask_value=0, input_shape=(max_len, len_chars))(input_word1) l1 = lstm1(mask) l1 = Dropout(0.01)(l1) l1 = MaxPooling1DMasked(pool_size=1, name='maxpooling')(l1) input_concat = concatenate([l1, mask]) l2 = lstm2(input_concat) l2 = Dropout(0.01)(l2) l2 = MaxPooling1DMasked(pool_size=1, name='maxpooling2')(l2) input_concat = concatenate([mask, l2]) l3 = lstm3(input_concat) l3 = Dropout(0.01)(l3) l3 = MaxPooling1DMasked(pool_size=1, name='maxpooling3')(l3) final_input_concat = concatenate([l1, l2, l3], axis=1) final_input_concat = Flatten()(final_input_concat) SentenceEncoder = Model(input_word1, final_input_concat) word1_representation = SentenceEncoder(input_word1) word2_representation = SentenceEncoder(input_word2) concat = concatenate([word1_representation, word2_representation]) mul = multiply([word1_representation, word2_representation]) sub = subtract([word1_representation, word2_representation]) final_merge = concatenate([concat, mul, subtract, input_feature]) dropout3 = Dropout(0.01)(final_merge) dense1 = Dense(hidden_units * 2, activation='relu', name='dense1')(dropout3) dropout4 = Dropout(0.01)(dense1) #flatten = Flatten()(dense1) #dropout5 = Dropout(0.01)(flatten) dense2 = Dense(1, activation='sigmoid', name='dense2')(dropout4) final_model = Model([input_word1, input_word2, input_feature], dense2) print(final_model.summary()) print('Compiling...') final_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) print('Fitting...') final_model.fit([train_data_1, train_data_2, train_data_3], train_labels, verbose=0, validation_data=([test_data_1, test_data_2, test_data_3], test_labels), callbacks=[checkpointer, early_stop], epochs=20) start_time = time.time() aux1 = final_model.predict([test_data_1, test_data_2, test_data_3], verbose=0) aux = (aux1 > 0.5).astype('int32').ravel() return aux, (time.time() - start_time)
s2=BatchNormalization()(s2) #define an input for handcrafted features nlp_input = Input(shape=(train_nlp_features.shape[1],), dtype="float32") features_dense = BatchNormalization()(nlp_input) features_dense = Dense(100, activation="relu")(features_dense) features_dense = BatchNormalization()(features_dense) #computing cosine similarity csd=dot([s1,s2],axes=-1, normalize=True) #computng multiplication between the 2 vectors mul_v = multiply([s1, s2]) #compute the absolute difference x_y = subtract([s1, s2]) merged=Lambda(lambda x:abs(x))(x_y) #merge the features merged = concatenate([merged, mul_v]) merged = Dropout(0.3)(merged) #final features for each pair of sentences merged = concatenate([merged, features_dense,csd]) merged = BatchNormalization()(merged) merged = Dense(200, activation="relu")(merged) merged = Dropout(0.2)(merged) merged = BatchNormalization()(merged) #using a softmax classifer
def deep_neural_net_gru(train_data_1, train_data_2, train_labels, test_data_1, test_data_2, test_labels, max_len, len_chars, bidirectional, hidden_units, selfattention, maxpooling, alignment, shortcut, multiplerlu, onlyconcat, n): early_stop = EarlyStopping(monitor='loss', patience=2, verbose=1) checkpointer = ModelCheckpoint( filepath="/home/amarinho/data-amarinho/checkpoint" + str(n) + ".hdf5", verbose=1, save_best_only=True) gru1 = GRU(hidden_units, implementation=2, return_sequences=True, name='gru1') gru2 = GRU(hidden_units, implementation=2, return_sequences=(alignment or selfattention or maxpooling), name='gru2') gru1 = Bidirectional(gru1, name='bigru1') gru2 = Bidirectional(gru2, name='bigru2') input_word1 = Input(shape=(max_len, len_chars)) input_word2 = Input(shape=(max_len, len_chars)) mask = Masking(mask_value=0, input_shape=(max_len, len_chars))(input_word1) g1 = gru1(mask) g1 = Dropout(0.01)(g1) if shortcut: # shortcut connections shortcut_con = concatenate([g1, mask]) g2 = gru2(shortcut_con) else: g2 = gru2(g1) g2 = Dropout(0.01)(g2) if selfattention: # selfattention g2 = Attention()(g2) elif maxpooling: # maxpooling g2 = GlobalMaxPooling1DMasked(name='maxpooling')(g2) SentenceEncoder = Model(input_word1, g2) print(SentenceEncoder.summary()) word1_representation = SentenceEncoder(input_word1) word2_representation = SentenceEncoder(input_word2) if alignment: att1 = AlignmentAttentionLayer()( [word1_representation, word2_representation]) att2 = AlignmentAttentionLayer()( [word2_representation, word1_representation]) concat = concatenate([att1, att2]) mul = multiply([att1, att2]) sub = subtract([att1, att2]) else: concat = concatenate([word1_representation, word2_representation]) mul = multiply([word1_representation, word2_representation]) sub = subtract([word1_representation, word2_representation]) final_merge = concatenate([concat, mul, sub]) dropout3 = Dropout(0.01)(final_merge) dense1 = Dense(hidden_units, activation='relu', name='dense1')(dropout3) dropout4 = Dropout(0.01)(dense1) #dropout4 = Reshape((2400,))(dropout4) dense2 = Dense(1, activation='sigmoid', name='dense2')(dropout4) final_model = Model([input_word1, input_word2], dense2) final_model.summary() print('Compiling...') final_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) print('Fitting...') final_model.fit([train_data_1, train_data_2], train_labels, verbose=0, validation_data=([test_data_1, test_data_2], test_labels), callbacks=[checkpointer, early_stop], epochs=20) start_time = time.time() aux1 = final_model.predict([test_data_1, test_data_2], verbose=0) aux = (aux1 > 0.5).astype('int32').ravel() return aux, (time.time() - start_time)
def Model3_LSTM_BiLSTM_LSTM(wordvocabsize, targetvocabsize, charvobsize, word_W, char_W, input_fragment_lenth, input_leftcontext_lenth, input_rightcontext_lenth, input_maxword_length, w2v_k, c2v_k, hidden_dim=200, batch_size=32, optimizer='rmsprop'): hidden_dim = 100 word_input_fragment = Input(shape=(input_fragment_lenth, ), dtype='int32') word_embedding_fragment = Embedding(input_dim=wordvocabsize + 1, output_dim=w2v_k, input_length=input_fragment_lenth, mask_zero=False, trainable=True, weights=[word_W])(word_input_fragment) word_embedding_fragment = Dropout(0.5)(word_embedding_fragment) char_input_fragment = Input(shape=( input_fragment_lenth, input_maxword_length, ), dtype='int32') char_embedding_fragment = TimeDistributed( Embedding(input_dim=charvobsize, output_dim=c2v_k, batch_input_shape=(batch_size, input_fragment_lenth, input_maxword_length), mask_zero=False, trainable=True, weights=[char_W]))(char_input_fragment) char_cnn_fragment = TimeDistributed( Conv1D(50, 3, activation='relu', padding='valid')) char_embedding_fragment = char_cnn_fragment(char_embedding_fragment) char_embedding_fragment = TimeDistributed( GlobalMaxPooling1D())(char_embedding_fragment) char_embedding_fragment = Dropout(0.25)(char_embedding_fragment) word_input_leftcontext = Input(shape=(input_leftcontext_lenth, ), dtype='int32') word_embedding_leftcontext = Embedding( input_dim=wordvocabsize + 1, output_dim=w2v_k, input_length=input_leftcontext_lenth, mask_zero=True, trainable=True, weights=[word_W])(word_input_leftcontext) word_embedding_leftcontext = Dropout(0.5)(word_embedding_leftcontext) char_input_leftcontext = Input(shape=( input_leftcontext_lenth, input_maxword_length, ), dtype='int32') char_input_rightcontext = Input(shape=( input_rightcontext_lenth, input_maxword_length, ), dtype='int32') word_input_rightcontext = Input(shape=(input_rightcontext_lenth, ), dtype='int32') word_embedding_rightcontext = Embedding( input_dim=wordvocabsize + 1, output_dim=w2v_k, input_length=input_rightcontext_lenth, mask_zero=True, trainable=True, weights=[word_W])(word_input_rightcontext) word_embedding_rightcontext = Dropout(0.5)(word_embedding_rightcontext) embedding_fragment = concatenate( [word_embedding_fragment, char_embedding_fragment], axis=-1) embedding_leftcontext = word_embedding_leftcontext embedding_rightcontext = word_embedding_rightcontext LSTM_leftcontext = LSTM(hidden_dim, go_backwards=False, activation='tanh')(embedding_leftcontext) Rep_LSTM_leftcontext = RepeatVector(input_fragment_lenth)(LSTM_leftcontext) LSTM_rightcontext = LSTM(hidden_dim, go_backwards=True, activation='tanh')(embedding_rightcontext) Rep_LSTM_rightcontext = RepeatVector(input_fragment_lenth)( LSTM_rightcontext) BiLSTM_fragment = Bidirectional(LSTM(hidden_dim // 2, activation='tanh', return_sequences=True), merge_mode='concat')(embedding_fragment) context_ADD = add([LSTM_leftcontext, BiLSTM_fragment, LSTM_rightcontext]) context_subtract_l = subtract([BiLSTM_fragment, LSTM_leftcontext]) context_subtract_r = subtract([BiLSTM_fragment, LSTM_rightcontext]) context_average = average( [LSTM_leftcontext, BiLSTM_fragment, LSTM_rightcontext]) context_maximum = maximum( [LSTM_leftcontext, BiLSTM_fragment, LSTM_rightcontext]) embedding_mix = concatenate([ embedding_fragment, BiLSTM_fragment, context_ADD, context_subtract_l, context_subtract_r, context_average, context_maximum ], axis=-1) # BiLSTM_fragment = Bidirectional(LSTM(hidden_dim // 2, activation='tanh'), merge_mode='concat')(embedding_fragment) decoderlayer1 = Conv1D(50, 1, activation='relu', strides=1, padding='same')(embedding_mix) decoderlayer2 = Conv1D(50, 2, activation='relu', strides=1, padding='same')(embedding_mix) decoderlayer3 = Conv1D(50, 3, activation='relu', strides=1, padding='same')(embedding_mix) decoderlayer4 = Conv1D(50, 4, activation='relu', strides=1, padding='same')(embedding_mix) CNNs_fragment = concatenate( [decoderlayer1, decoderlayer2, decoderlayer3, decoderlayer4], axis=-1) CNNs_fragment = Dropout(0.5)(CNNs_fragment) CNNs_fragment = GlobalMaxPooling1D()(CNNs_fragment) concat = Dropout(0.3)(CNNs_fragment) output = Dense(targetvocabsize, activation='softmax')(concat) Models = Model([ word_input_fragment, word_input_leftcontext, word_input_rightcontext, char_input_fragment, char_input_leftcontext, char_input_rightcontext ], output) Models.compile(loss='categorical_crossentropy', optimizer=optimizers.RMSprop(lr=0.001), metrics=['acc']) return Models