def loss(self, input_batch, name='wavenet'): '''Creates a WaveNet network and returns the autoencoding loss. The variables are all scoped to the given name. ''' with tf.name_scope(name): input_batch = mu_law_encode(input_batch, self.quantization_channels) encoded = self._one_hot(input_batch) raw_output = self._create_network(encoded) with tf.name_scope('loss'): # Shift original input left by one sample, which means that # each output sample has to predict the next input sample. shifted = tf.slice(encoded, [0, 1, 0], [-1, tf.shape(encoded)[1] - 1, -1]) shifted = tf.pad(shifted, [[0, 0], [0, 1], [0, 0]]) prediction = tf.reshape(raw_output, [-1, self.quantization_channels]) loss = tf.nn.softmax_cross_entropy_with_logits( prediction, tf.reshape(shifted, [-1, self.quantization_channels])) reduced_loss = tf.reduce_mean(loss) tf.scalar_summary('loss', reduced_loss) return reduced_loss
def testEncodeDecode(self): x = np.linspace(-1, 1, 1000).astype(np.float32) channels = 256 # Test whether decoded signal is roughly equal to # what was encoded before with self.test_session() as sess: encoded = mu_law_encode(x, channels) x1 = sess.run(mu_law_decode(encoded, channels)) self.assertAllClose(x, x1, rtol=1e-1, atol=0.05) # Make sure that re-encoding leaves the waveform invariant with self.test_session() as sess: encoded = mu_law_encode(x1, channels) x2 = sess.run(mu_law_decode(encoded, channels)) self.assertAllClose(x1, x2)
def create_seed(filename, sample_rate, quantization_channels, window_size=WINDOW): audio, _ = librosa.load(filename, sr=sample_rate, mono=True) quantized = mu_law_encode(audio, quantization_channels) cut_index = tf.size(quantized) + tf.constant(window_size) - tf.constant(1) return quantized[:cut_index]
def testEncodeNegativeChannelSize(self): np.random.seed(1944) # For repeatability of test. channels = -256 number_of_samples = 1024 x = np.zeros(number_of_samples).astype(np.float32) manual_encode = manual_mu_law_encode(x, channels) with self.test_session() as sess: self.assertRaises(TypeError, sess.run(mu_law_encode(x, channels)))
def testEncodeUniformRandomNoise(self): np.random.seed(42) # For repeatability of test. channels = 256 number_of_samples = 2048 x = np.random.uniform(-1, 1, number_of_samples).astype(np.float32) manual_encode = manual_mu_law_encode(x, channels) with self.test_session() as sess: encode = sess.run(mu_law_encode(x, channels)) self.assertAllEqual(manual_encode, encode)
def testEncodeZeros(self): np.random.seed(1944) # For repeatability of test. channels = 256 number_of_samples = 1024 x = np.zeros(number_of_samples).astype(np.float32) manual_encode = manual_mu_law_encode(x, channels) with self.test_session() as sess: encode = sess.run(mu_law_encode(x, channels)) self.assertAllEqual(manual_encode, encode)
def testEncodePrecomputed(self): channels = 256 number_of_samples = 10 x = np.array( [-1.0, 1.0, 0.6, -0.25, 0.01, 0.33, -0.9999, 0.42, 0.1, -0.45]).astype(np.float32) encoded_manual = np.array([0, 255, 243, 32, 157, 230, 0, 235, 203, 18]).astype(np.int32) with self.test_session() as sess: encoded = sess.run(mu_law_encode(x, channels)) self.assertAllEqual(encoded_manual, encoded)
def testEncodeRamp(self): np.random.seed(1944) # For repeatability of test. channels = 256 number_of_samples = 1024 number_of_steps = 2.0 / number_of_samples x = np.arange(-1.0, 1.0, number_of_steps).astype(np.float32) manual_encode = manual_mu_law_encode(x, channels) with self.test_session() as sess: encode = sess.run(mu_law_encode(x, channels)) self.assertAllEqual(manual_encode, encode)
def testEncodeIsSurjective(self): x = np.linspace(-1, 1, 10000).astype(np.float32) channels = 123 with self.test_session() as sess: encoded = sess.run(mu_law_encode(x, channels)) self.assertEqual(len(np.unique(encoded)), channels)