def BiGRU(input_tensor, units=64, use_gpu=True): """ Bi-GRU :param input_tensor: :param units: :param use_gpu: if true, use CuDNNGRU to accelerate computing. :return: """ if use_gpu: GRU = layers.CuDNNGRU else: GRU = layers.GRU gru1 = layers.Bidirectional(GRU(units, return_sequences=True, kernel_initializer='he_normal', name='gru1'), merge_mode='sum')(input_tensor) x = layers.Bidirectional(GRU(units, return_sequences=True, kernel_initializer='he_normal', name='gru2'), merge_mode='concat')(gru1) x = layers.TimeDistributed(layers.Dense(units=units * 2, activation='relu'), name='fc')(x) x = layers.TimeDistributed(layers.Dropout(0.3), name='dropout')(x) return x
def BiLSTM(input_tensor, units=64, use_gpu=False): """ Bi-LSTM :param input_tensor: :param units: :param use_gpu: if true, use CuDNNGRU to accelerate computing. :return: """ if use_gpu: LSTM = layers.CuDNNLSTM else: LSTM = layers.LSTM lstm1 = layers.Bidirectional(LSTM(units, return_sequences=True, kernel_initializer='he_normal', name='lstm1'), merge_mode='sum')(input_tensor) x = layers.Bidirectional(LSTM(units, return_sequences=True, kernel_initializer='he_normal', name='lstm2'), merge_mode='concat')(lstm1) x = layers.TimeDistributed(layers.Dense(units=units * 2, activation='relu'), name='fc')(x) x = layers.TimeDistributed(layers.Dropout(0.3), name='dropout')(x) return x
def custom_model_fn(features, labels, mode): """Used to build a TF custom estimator""" embedded_input = tf.contrib.layers.embed_sequence(features['sequence'], embedding_matrix.shape[0], embedding_matrix.shape[1], initializer=embedding_initializer, trainable=False) first_gru = layers.CuDNNGRU(self.num_neurons, return_sequences=True) gru_output = layers.Bidirectional(first_gru)(embedded_input) gru_output = layers.Bidirectional(layers.CuDNNGRU(self.num_neurons))(gru_output) logits = layers.Dense(6)(gru_output) predicted_classes = tf.argmax(logits, 1) if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'class_ids': predicted_classes[:, tf.newaxis], 'probabilities': tf.nn.sigmoid(logits), 'logits': logits } return tf.estimator.EstimatorSpec(mode, predictions=predictions) loss = tf.losses.sigmoid_cross_entropy(labels, logits) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss) assert mode == tf.estimator.ModeKeys.TRAIN optimizer = tf.train.AdamOptimizer() train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def build_bigru_model(self, embedding_matrix) -> Tuple[Model, Model]: """ build and return multi-headed BiGru model with 1) MLM output from first GRU layer 2) standard toxicity classification output from second :param embedding_matrix: :return: """ token_input = layers.Input(shape=(self.max_seq_len,)) embedding_layer = layers.Embedding(self.vocab_size + 1, self.embedding_dims, weights=[embedding_matrix], trainable=False) embedded_input = embedding_layer(token_input) gru1_output = layers.Bidirectional(layers.CuDNNGRU(self.num_neurons, return_sequences=True))(embedded_input) aux_output = layers.Dense(self.vocab_size + 1, 'softmax', name='aux_output')(gru1_output) gru2_output = layers.Bidirectional(layers.CuDNNGRU(self.num_neurons))(gru1_output) main_output = layers.Dense(6, activation='sigmoid', name='main_output')(gru2_output) training_model = Model(inputs=token_input, outputs=[main_output, aux_output]) mlm_loss = MaskedPenalizedSparseCategoricalCrossentropy(CONFIDENCE_PENALTY) training_model.compile(optimizer=optimizers.Adam(), loss={'main_output': MaskedBinaryCrossedentropy(), 'aux_output': mlm_loss}) inference_model = Model(inputs=token_input, outputs=main_output) print('generated bigru model...') print(training_model.summary()) return training_model, inference_model
def build_bigru_model(self, embedding_matrix) -> Model: """ build and return BiGru model using standard optimizer and loss :param embedding_matrix: :return: """ token_input = layers.Input(shape=(self.max_seq_len, )) embedding_layer = layers.Embedding(self.vocab_size + 1, self.embedding_dims, weights=[embedding_matrix], trainable=False) embedded_input = embedding_layer(token_input) gru_output = layers.Bidirectional( layers.CuDNNGRU(self.num_neurons, return_sequences=True))(embedded_input) gru_output = layers.Bidirectional(layers.CuDNNGRU( self.num_neurons))(gru_output) dense_output = layers.Dense(6, activation='sigmoid')(gru_output) bigru_model = Model(token_input, dense_output) bigru_model.compile(optimizer=optimizers.Adam(), loss=losses.binary_crossentropy) print('generated bigru model...') return bigru_model
def __init__(self, embedding, hidden_size, batch_size): super(Encoder, self).__init__() self.embedding = embedding self.batch_size = batch_size self.hidden_size = hidden_size self.bilstm = layers.Bidirectional(layers.CuDNNLSTM( self.hidden_size, return_sequences=True, return_state=True), merge_mode='concat')
def build_bigru_model(self) -> Model: """ build and return BiGru model using standard optimizer and loss :return: """ embedded_input = layers.Input(shape=(None, self.embedding_dims)) gru_output = layers.Bidirectional( layers.CuDNNGRU(self.num_neurons, return_sequences=True))(embedded_input) gru_output = layers.Bidirectional(layers.CuDNNGRU( self.num_neurons))(gru_output) dense_output = layers.Dense(6, activation='sigmoid')(gru_output) bigru_model = Model(embedded_input, dense_output) bigru_model.compile(optimizer=optimizers.Adam(), loss=losses.binary_crossentropy) print('generated bigru model...') print(bigru_model.summary()) return bigru_model
def _get_keras_model(self) -> models.Model: I = layers.Input(shape=(None, self._embedding_size), dtype='float32', name=base_model.TOKENS_FEATURE_KEY) # Bidirectional GRU H = I for num_units in self.hparams().gru_units: H = layers.Bidirectional( layers.GRU(num_units, return_sequences=True))(I) # Attention last_gru_units = self.hparams( ).gru_units[-1] * 2 # x2 because bidirectional A = layers.TimeDistributed(layers.Dense(self.hparams().attention_units, activation='relu'), input_shape=(None, last_gru_units))(H) A = layers.TimeDistributed(layers.Dense(1))(A) A = layers.Flatten()(A) A = layers.Activation('softmax')(A) # Dense X = layers.Dot((1, 1))([H, A]) X = layers.Flatten()(X) for num_units in self.hparams().dense_units: X = layers.Dense(num_units, activation='relu')(X) X = layers.Dropout(self.hparams().dropout_rate)(X) # Outputs outputs = [] for label in self._labels: outputs.append( layers.Dense(1, activation='sigmoid', name=label)(X)) model = models.Model(inputs=I, outputs=outputs) model.compile( optimizer=optimizers.Adam(lr=self.hparams().learning_rate), loss='binary_crossentropy', metrics=['binary_accuracy', super().roc_auc]) tf.logging.info(model.summary()) return model
def __init__(self, units, use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, dropout=0., return_sequences=False, return_state=False, go_backwards=False, stateful=False, num_layers=1, bidirectional=False, **kwargs): super(LSTM, self).__init__(**kwargs) assert num_layers == 1, "Only support single layer for CuDNN RNN in keras" self._rnn = layers.LSTM( # cuDNN requirement activation='tanh', recurrent_activation='sigmoid', recurrent_dropout=0, unroll=False, use_bias=use_bias, # free arguments units=units, kernel_initializer=kernel_initializer, recurrent_initializer=recurrent_initializer, bias_initializer=bias_initializer, unit_forget_bias=unit_forget_bias, dropout=dropout, return_sequences=return_sequences, return_state=return_state, go_backwards=go_backwards, stateful=stateful, **kwargs) if bidirectional: self._rnn = layers.Bidirectional( self._rnn, merge_mode='concat', )
def __init__(self, units, activation='tanh', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', dropout=0., return_sequences=False, return_state=False, go_backwards=False, stateful=False, num_layers=1, bidirectional=False, **kwargs): super(SimpleRNN, self).__init__(*kwargs) assert num_layers == 1, "Only support single layer for CuDNN RNN in keras" self._rnn = layers.SimpleRNN( units=units, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, recurrent_initializer=recurrent_initializer, bias_initializer=bias_initializer, dropout=dropout, recurrent_dropout=0., return_sequences=return_sequences, return_state=return_state, go_backwards=go_backwards, stateful=stateful, unroll=False) if bidirectional: self._rnn = layers.Bidirectional( self._rnn, merge_mode='concat', )
# In[99]: model.fit(X_train, Y_train, validation_data=(X_test, Y_test), callbacks=[es], epochs=50, verbose=1) # # Bi-LSTM # In[103]: model = Sequential(name="Bi-LSTM") model.add( layers.Bidirectional( layers.LSTM(64, input_shape=(X_train.shape[1], 1), activation="relu"))) model.add(layers.Dense(6, activation="softmax")) # In[104]: model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=['accuracy']) # In[106]: model.fit(X_train, Y_train, validation_data=(X_test, Y_test), callbacks=[es], epochs=50)