def __init__( self, base_model: tf.keras.Model, units: int, chain_initializer: types.Initializer = "orthogonal", use_boundary: bool = True, boundary_initializer: types.Initializer = "zeros", use_kernel: bool = True, **kwargs, ): super().__init__() # lazy import to solve circle import issue: # tfa.layers.CRF -> tfa.text.__init__ -> tfa.text.crf_wrapper -> tfa.layers.CRF from tensorflow_addons.layers.crf import CRF # noqa self.crf_layer = CRF( units=units, chain_initializer=chain_initializer, use_boundary=use_boundary, boundary_initializer=boundary_initializer, use_kernel=use_kernel, **kwargs, ) self.base_model = base_model
def get_test_data_extended(): logits = np.array([ [[0, 0, 0.5, 0.5, 0.2], [0, 0, 0.3, 0.3, 0.1], [0, 0, 0.9, 10, 1]], [[0, 0, 0.2, 0.5, 0.2], [0, 0, 3, 0.3, 0.1], [0, 0, 0.9, 1, 1]], ]) tags = np.array([[2, 3, 4], [3, 2, 2]]) transitions = np.array([ [0.1, 0.2, 0.3, 0.4, 0.5], [0.8, 0.3, 0.1, 0.7, 0.9], [-0.3, 2.1, -5.6, 3.4, 4.0], [0.2, 0.4, 0.6, -0.3, -0.4], [1.0, 1.0, 1.0, 1.0, 1.0], ]) boundary_values = np.ones((5, )) crf_layer = CRF( units=5, use_kernel=False, # disable kernel transform chain_initializer=tf.keras.initializers.Constant(transitions), use_boundary=True, boundary_initializer=tf.keras.initializers.Constant(boundary_values), name="crf_layer", ) return logits, tags, transitions, boundary_values, crf_layer
def __init__(self, vocab_size, embedding_size, hidden_size, tag_size, *args, **kwargs): super().__init__(*args, **kwargs) self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_size) self.bi_lstm = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM( hidden_size, return_sequences=True), merge_mode="concat") self.dense = tf.keras.layers.Dense(tag_size) self.crf = CRF(tag_size)
def __init__(self, words_count, labels_count, max_length): super(BiLSTMCRF, self).__init__() self.embedding = Embedding(words_count, 20, input_length=max_length, mask_zero=True) self.lstm = Bidirectional( LSTM(50, recurrent_dropout=0.1, return_sequences=True)) self.dense = TimeDistributed(Dense(50, "relu")) self.crf = CRF(labels_count)
def get_some_model(x_np, y_np, sanity_check=True): x_input = tf.keras.layers.Input(shape=x_np.shape[1:]) crf_outputs = CRF(5, name="L")(x_input) base_model = tf.keras.Model(x_input, crf_outputs) model = ModelWithCRFLoss(base_model) model.compile("adam") if sanity_check: model.fit(x=x_np, y=y_np) model.evaluate(x_np, y_np) model.predict(x_np) return model
def __init__(self, params: BaseParams, problem_name: str): super(SequenceLabel, self).__init__(name=problem_name) self.params = params self.problem_name = problem_name num_classes = self.params.num_classes[self.problem_name] self.dense = tf.keras.layers.Dense(num_classes, activation=None) self.dropout = tf.keras.layers.Dropout(1 - params.dropout_keep_prob) if self.params.crf: self.crf = CRF(num_classes) self.metric_fn = tf.keras.metrics.Accuracy( name='{}_acc'.format(self.problem_name)) else: self.metric_fn = tf.keras.metrics.SparseCategoricalAccuracy( name='{}_acc'.format(self.problem_name))
def test_unmasked_viterbi_decode(): x_np, y_np = get_test_data() transitions = np.ones([5, 5]) boundary_value = np.ones(5) layer = CRF( units=5, use_kernel=False, # disable kernel transform chain_initializer=tf.keras.initializers.Constant(transitions), use_boundary=True, boundary_initializer=tf.keras.initializers.Constant(boundary_value), ) decoded_sequence, _, _, _ = layer(x_np) decoded_sequence = decoded_sequence.numpy() np.testing.assert_equal(decoded_sequence, y_np) assert decoded_sequence.dtype == np.int32
def test_mask_left_padding(): x_np, y_np = get_test_data() mask = np.array([[0, 1, 1], [1, 1, 1]]) x = tf.keras.layers.Input(shape=x_np.shape[1:]) crf_layer_outputs = CRF(5)(x, mask=tf.constant(mask)) base_model = tf.keras.Model(x, crf_layer_outputs) model = ModelWithCRFLoss(base_model) # we can only check the value of the mask # if we run eagerly. It's kind of a debug mode # otherwise we're wasting computation. model.compile("adam", run_eagerly=True) with pytest.raises(NotImplementedError) as context: model(x_np).numpy() assert "CRF layer do not support left padding" in str(context.value)
def test_mask_right_padding(): x_np, y_np = get_test_data() mask = np.array([[1, 1, 1], [1, 1, 0]]) x = tf.keras.layers.Input(shape=x_np.shape[1:]) crf_layer_outputs = CRF(5)(x, mask=tf.constant(mask)) base_model = tf.keras.Model(x, crf_layer_outputs) model = ModelWithCRFLoss(base_model) # check shape inference model.compile("adam") old_weights = model.get_weights() model.fit(x_np, y_np) new_weights = model.get_weights() # we check that the weights were updated during the training phase. with pytest.raises(AssertionError): assert_all_equal(old_weights, new_weights) model.predict(x_np)
class CRFModelWrapper(tf.keras.Model): def __init__( self, base_model: tf.keras.Model, units: int, chain_initializer: types.Initializer = "orthogonal", use_boundary: bool = True, boundary_initializer: types.Initializer = "zeros", use_kernel: bool = True, **kwargs, ): super().__init__() # lazy import to solve circle import issue: # tfa.layers.CRF -> tfa.text.__init__ -> tfa.text.crf_wrapper -> tfa.layers.CRF from tensorflow_addons.layers.crf import CRF # noqa self.crf_layer = CRF( units=units, chain_initializer=chain_initializer, use_boundary=use_boundary, boundary_initializer=boundary_initializer, use_kernel=use_kernel, **kwargs, ) self.base_model = base_model def unpack_training_data(self, data): # override me, if this is not suit for your task if len(data) == 3: x, y, sample_weight = data else: x, y = data sample_weight = None return x, y, sample_weight def call(self, inputs, training=None, mask=None, return_crf_internal=False): base_model_outputs = self.base_model(inputs, training, mask) # change next line, if your model has more outputs crf_input = base_model_outputs decode_sequence, potentials, sequence_length, kernel = self.crf_layer( crf_input) # change next line, if your base model has more outputs # Aways keep `(potentials, sequence_length, kernel), decode_sequence, ` # as first two outputs of model. # current `self.train_step()` expected such settings outputs = (potentials, sequence_length, kernel), decode_sequence if return_crf_internal: return outputs else: # outputs[0] is the crf internal, skip it output_without_crf_internal = outputs[1:] # it is nicer to return a tensor instead of an one tensor list if len(output_without_crf_internal) == 1: return output_without_crf_internal[0] else: return output_without_crf_internal def compute_crf_loss(self, potentials, sequence_length, kernel, y, sample_weight=None): crf_likelihood, _ = crf_log_likelihood(potentials, y, sequence_length, kernel) # convert likelihood to loss flat_crf_loss = -1 * crf_likelihood if sample_weight is not None: flat_crf_loss = flat_crf_loss * sample_weight crf_loss = tf.reduce_mean(flat_crf_loss) return crf_loss def train_step(self, data): x, y, sample_weight = self.unpack_training_data(data) with tf.GradientTape() as tape: (potentials, sequence_length, kernel), decoded_sequence, *_ = self(x, training=True, return_crf_internal=True) crf_loss = self.compute_crf_loss(potentials, sequence_length, kernel, y, sample_weight) loss = crf_loss + tf.reduce_sum(self.losses) gradients = tape.gradient(loss, self.trainable_variables) self.optimizer.apply_gradients(zip(gradients, self.trainable_variables)) # Update metrics (includes the metric that tracks the loss) self.compiled_metrics.update_state(y, decoded_sequence) # Return a dict mapping metric names to current value orig_results = {m.name: m.result() for m in self.metrics} crf_results = {"loss": loss, "crf_loss": crf_loss} return {**orig_results, **crf_results} def test_step(self, data): x, y, sample_weight = self.unpack_training_data(data) (potentials, sequence_length, kernel), decode_sequence, *_ = self(x, training=False, return_crf_internal=True) crf_loss = self.compute_crf_loss(potentials, sequence_length, kernel, y, sample_weight) loss = crf_loss + tf.reduce_sum(self.losses) # Update metrics (includes the metric that tracks the loss) self.compiled_metrics.update_state(y, decode_sequence) # Return a dict mapping metric names to current value results = {m.name: m.result() for m in self.metrics} results.update({"loss": loss, "crf_loss": crf_loss}) # append loss return results def get_config(self): base_model_config = self.base_model.get_config() crf_config = self.crf_layer.get_config() return {**{"base_model": base_model_config}, **crf_config} @classmethod def from_config(cls, config): base_model_config = config.pop("base_model") base_model = tf.keras.Model.from_config(base_model_config) return cls(base_model=base_model, **config)