def test_make_ngram_labels_additive_smoothing(self): label_start_idx = tf.constant([ [1, -1, -1], [2, 3, 0], ]) label_phrase_len = tf.constant([ [3, -1, -1], [2, 1, 2], ]) long_max_length = 4 kp_max_length = 5 additive_smoothing_mass = 1.0 smoothed_third = (1 / 3) + 0.05 reshaped_unnormalized_expected = [ [ [0.05, 0.05, 0.05, 0.05], # 1-grams [0.05, 0.05, 0.05, 0.05], # 2-grams [0.05, 1.05, 0.05, 0.05], # 3-grams [0.05, 0.05, 0.05, 0.05], # 4-grams [0.05, 0.05, 0.05, 0.05], # 5-grams ], [ [0.05, 0.05, 0.05, smoothed_third], # 1-grams [smoothed_third, 0.05, smoothed_third, 0.05], # 2-grams [0.05, 0.05, 0.05, 0.05], # 3-grams [0.05, 0.05, 0.05, 0.05], # 4-grams [0.05, 0.05, 0.05, 0.05], # 5-grams ], ] batch_size = len(reshaped_unnormalized_expected) unnormalized_expected = tf.reshape( reshaped_unnormalized_expected, [batch_size, kp_max_length * long_max_length]) expected = ( unnormalized_expected / tf.reduce_sum(unnormalized_expected, axis=-1, keepdims=True)) self.assertAllClose( expected, run_finetuning_lib.make_ngram_labels( label_start_idx=label_start_idx, label_phrase_len=label_phrase_len, long_max_length=long_max_length, kp_max_length=kp_max_length, additive_smoothing_mass=additive_smoothing_mass))
def test_make_ngram_labels(self): label_start_idx = tf.constant([ [1, -1, -1], [2, 3, 0], ]) label_phrase_len = tf.constant([ [3, -1, -1], [2, 1, 2], ]) long_max_length = 4 kp_max_length = 5 reshaped_expected = [ [ [0, 0, 0, 0], # 1-grams [0, 0, 0, 0], # 2-grams [0, 1, 0, 0], # 3-grams [0, 0, 0, 0], # 4-grams [0, 0, 0, 0], # 5-grams ], [ [0, 0, 0, 1 / 3], # 1-grams [1 / 3, 0, 1 / 3, 0], # 2-grams [0, 0, 0, 0], # 3-grams [0, 0, 0, 0], # 4-grams [0, 0, 0, 0], # 5-grams ], ] batch_size = len(reshaped_expected) expected = tf.reshape(reshaped_expected, [batch_size, kp_max_length * long_max_length]) self.assertAllClose( expected, run_finetuning_lib.make_ngram_labels( label_start_idx=label_start_idx, label_phrase_len=label_phrase_len, long_max_length=long_max_length, kp_max_length=kp_max_length))