def test_crf_log_likelihood(dtype): inputs = np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=dtype) transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=dtype) sequence_lengths = np.array(3, dtype=np.int32) num_words = inputs.shape[0] num_tags = inputs.shape[1] all_sequence_log_likelihoods = [] # Make sure all probabilities sum to 1. for tag_indices in itertools.product(range(num_tags), repeat=sequence_lengths): tag_indices = list(tag_indices) tag_indices.extend([0] * (num_words - sequence_lengths)) sequence_log_likelihood, _ = text.crf_log_likelihood( inputs=tf.expand_dims(inputs, 0), tag_indices=tf.expand_dims(tag_indices, 0), sequence_lengths=tf.expand_dims(sequence_lengths, 0), transition_params=tf.constant(transition_params), ) all_sequence_log_likelihoods.append(sequence_log_likelihood) total_log_likelihood = tf.reduce_logsumexp(all_sequence_log_likelihoods) test_utils.assert_allclose_according_to_type( total_log_likelihood, 0.0, rtol=1e-6, atol=1e-6, half_rtol=2e-3, half_atol=2e-3 ) # check if `transition_params = None` raises an error text.crf_log_likelihood( inputs=tf.expand_dims(inputs, 0), tag_indices=tf.expand_dims(tag_indices, 0), sequence_lengths=tf.expand_dims(sequence_lengths, 0), )
def testCrfLogLikelihood(self): inputs = np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32) transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32) sequence_lengths = np.array(3, dtype=np.int32) # TODO: https://github.com/PyCQA/pylint/issues/3139 # pylint: disable=E1136 num_words = inputs.shape[0] num_tags = inputs.shape[1] # pylint: enable=E1136 all_sequence_log_likelihoods = [] # Make sure all probabilities sum to 1. for tag_indices in itertools.product(range(num_tags), repeat=sequence_lengths): tag_indices = list(tag_indices) tag_indices.extend([0] * (num_words - sequence_lengths)) sequence_log_likelihood, _ = text.crf_log_likelihood( inputs=tf.expand_dims(inputs, 0), tag_indices=tf.expand_dims(tag_indices, 0), sequence_lengths=tf.expand_dims(sequence_lengths, 0), transition_params=tf.constant(transition_params)) all_sequence_log_likelihoods.append(sequence_log_likelihood) total_log_likelihood = tf.reduce_logsumexp( all_sequence_log_likelihoods) tf_total_log_likelihood = self.evaluate(total_log_likelihood) self.assertAllClose(tf_total_log_likelihood, 0.0) # check if `transition_params = None` raises an error text.crf_log_likelihood(inputs=tf.expand_dims(inputs, 0), tag_indices=tf.expand_dims(tag_indices, 0), sequence_lengths=tf.expand_dims( sequence_lengths, 0))
def testCrfLogLikelihood(self): inputs = np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32) transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32) sequence_lengths = np.array(3, dtype=np.int32) num_words = inputs.shape[0] num_tags = inputs.shape[1] all_sequence_log_likelihoods = [] # Make sure all probabilities sum to 1. for tag_indices in itertools.product(range(num_tags), repeat=sequence_lengths): tag_indices = list(tag_indices) tag_indices.extend([0] * (num_words - sequence_lengths)) sequence_log_likelihood, _ = text.crf_log_likelihood( inputs=tf.expand_dims(inputs, 0), tag_indices=tf.expand_dims(tag_indices, 0), sequence_lengths=tf.expand_dims(sequence_lengths, 0), transition_params=tf.constant(transition_params)) all_sequence_log_likelihoods.append(sequence_log_likelihood) total_log_likelihood = tf.reduce_logsumexp( all_sequence_log_likelihoods) tf_total_log_likelihood = self.evaluate(total_log_likelihood) self.assertAllClose(tf_total_log_likelihood, 0.0)
def call(self, inputs, training=False): """Decodes the highest scoring sequence of tags. If training, calculates and records the CRF log-likelihood loss (length normalized). Args: inputs: A list with three tensors. The first tensor is [batch_size, max_seq_len, num_tags] tensor of logits. The second tensor is a [batch_size] vector of true sequence lengths. The third tensor is [batch_size, max_seq_len] tensor of expected ids (only used in training mode). training: Whether it runs in training mode. Returns: decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`. Contains the highest scoring tag indices. """ logits, sequence_length, labels = inputs decode_tags, _ = tfa_text.crf_decode(logits, self._transition_matrix, sequence_length) decode_tags = tf.cast(decode_tags, tf.int32) if training: # Clip right-padding which equals -1 and messes with the loss calculation. labels = tf.maximum(labels, 0) log_likelihood, _ = tfa_text.crf_log_likelihood( logits, labels, sequence_length, self._transition_matrix) self.add_loss(tf.reduce_mean(-log_likelihood)) return decode_tags
def testDifferentDtype(self): inputs = np.ones([16, 20, 5], dtype=np.float32) tags = tf.convert_to_tensor(np.ones([16, 20], dtype=np.int64)) seq_lens = np.ones([16], dtype=np.int64) * 20 loss, _ = text.crf_log_likelihood(inputs=inputs, tag_indices=tags, sequence_lengths=seq_lens)
def crf_loss(y_true, y_pred): y_pred = tf.convert_to_tensor(y_pred, dtype=self.dtype) log_likelihood, self.transitions = crf_log_likelihood( y_pred, tf.cast(K.argmax(y_true), dtype=tf.int32) if self.sparse_target else y_true, self.sequence_lengths, transition_params=self.transitions, ) return tf.reduce_mean(-log_likelihood)
def compute_crf_loss(self, potentials, sequence_length, kernel, y, sample_weight=None): crf_likelihood, _ = crf_log_likelihood(potentials, y, sequence_length, kernel) # convert likelihood to loss flat_crf_loss = -1 * crf_likelihood if sample_weight is not None: flat_crf_loss = flat_crf_loss * sample_weight crf_loss = tf.reduce_mean(flat_crf_loss) return crf_loss
def call(self, inputs: list[tf.Tensor], mask: tf.Tensor) -> tf.RaggedTensor: emissions, tag_ids = inputs mask = tf.cast(mask, tf.int32) sequence_lengths = tf.math.reduce_sum(mask, axis=1) likelihoods, _ = crf_log_likelihood( emissions, tag_ids, sequence_lengths, self.transition_weight, ) loss = tf.math.negative(tf.math.reduce_mean(likelihoods)) self.add_loss(loss) decoded_tag_ids, _ = crf_decode( emissions, self.transition_weight, sequence_lengths, ) is_equal = tf.cast(tf.equal(tag_ids, decoded_tag_ids), tf.int32) tag_accuracy = tf.reduce_sum(is_equal * mask) / tf.reduce_sum(mask) self.add_metric(tag_accuracy, name="tag_accuracy") boolean_mask = tf.cast(mask, tf.bool) return tf.ragged.boolean_mask(decoded_tag_ids, boolean_mask)