def testWordPermutation(self, k): x = tf.constant("0 1 2 3 4 5 6 7 8 9 10 11 12 13".split()) y = noise.WordPermutation(k)(x) x, y = self.evaluate([x, y]) if k == 0: self.assertAllEqual(y, x) else: for i, v in enumerate(y.tolist()): self.assertLess(abs(int(v) - i), k)
def testWordNoising(self): tokens = tf.constant([["a■", "b", "c■", "d", "■e"], ["a", "b", "c", "", ""]]) lengths = tf.constant([5, 3]) noiser = noise.WordNoiser() noiser.add(noise.WordDropout(0.1)) noiser.add(noise.WordReplacement(0.1)) noiser.add(noise.WordPermutation(3)) noisy_tokens, noisy_lengths = noiser(tokens, sequence_length=lengths, keep_shape=True) tokens, noisy_tokens = self.evaluate([tokens, noisy_tokens]) self.assertAllEqual(noisy_tokens.shape, tokens.shape)
def testWordNoising(self, as_function, tokens, lengths): tokens = tf.constant(tokens) if lengths is not None: lengths = tf.constant(lengths, dtype=tf.int32) noiser = noise.WordNoiser() noiser.add(noise.WordDropout(0.1)) noiser.add(noise.WordReplacement(0.1)) noiser.add(noise.WordPermutation(3)) noiser_fn = tf.function(noiser) if as_function else noiser noisy_tokens, noisy_lengths = noiser_fn(tokens, sequence_length=lengths, keep_shape=True) tokens, noisy_tokens = self.evaluate([tokens, noisy_tokens]) self.assertAllEqual(noisy_tokens.shape, tokens.shape)