Пример #1
0
    def test1(self):
        audio, output_audio = make_sine_waves()
        audio_tensor = tf.convert_to_tensor(audio, dtype=tf.float32)
        output_audio_tensor = tf.convert_to_tensor(output_audio,
                                                   dtype=tf.float32)

        input_batch = mu_law_encode(audio_tensor, 256)
        output_batch = mu_law_encode(output_audio_tensor, 256)
        encoded = self.net._one_hot(input_batch)
        output_encoded = self.net._one_hot(output_batch)
        shifted = tf.slice(output_encoded, [0, 1, 0],
                           [-1, tf.shape(output_encoded)[1] - 1, -1])
        # shifted = tf.pad(shifted, [[0, 0], [0, 1], [0, 0]])
        raw_output = self.net._create_network(encoded)
        out = tf.reshape(raw_output, [-1, self.net.quantization_channels])
        # Cast to float64 to avoid bug in TensorFlow
        proba = tf.cast(tf.nn.softmax(tf.cast(out, tf.float64)), tf.float32)
        last = tf.slice(proba, [tf.shape(proba)[0] - 1, 0],
                        [1, self.net.quantization_channels])
        lasted = tf.reshape(last, [-1])
        # shifted = tf.pad(shifted, [[0, 0], [0, 1], [0, 0]])
        # slice = tf.reshape(shifted, [-1, self.net.quantization_channels])
        with self.test_session() as sess:
            sess.run(tf.initialize_all_variables())
            print(sess.run(out).shape)
            print(sess.run(proba)[1])
            print(sess.run(proba)[0])
            print(sess.run(last).shape)
            print(sess.run(lasted).shape)
Пример #2
0
def load_audio_not_one_hot(
    filename,
    sample_rate=get_model_params('SAMPLE_RATE'),
    quantization_channels=get_model_params('QUANTIZATION_CHANNELS'),
    batch_size=get_model_params('BATCH_SIZE')):
    audio = load_wav(filename, sample_rate)
    quantized = mu_law_encode(audio, quantization_channels)
    return quantized
Пример #3
0
def create_seed(filename, sample_rate, quantization_channels, window_size):
    audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
    quantized = mu_law_encode(audio, quantization_channels)
    cut_index = tf.cond(
        tf.size(quantized) < tf.constant(window_size),
        lambda: tf.size(quantized), lambda: tf.constant(window_size))

    return quantized[:cut_index]
Пример #4
0
    def testEncodeDecode(self):
        x = np.linspace(-1, 1, 1000).astype(np.float32)
        channels = 256

        # Test whether decoded signal is roughly equal to
        # what was encoded before
        with self.test_session() as sess:
            encoded = mu_law_encode(x, channels)
            x1 = sess.run(mu_law_decode(encoded, channels))

        self.assertAllClose(x, x1, rtol=1e-1, atol=0.05)

        # Make sure that re-encoding leaves the waveform invariant
        with self.test_session() as sess:
            encoded = mu_law_encode(x1, channels)
            x2 = sess.run(mu_law_decode(encoded, channels))

        self.assertAllClose(x1, x2)
Пример #5
0
    def testEncodeDecode(self):
        x = np.linspace(-1, 1, 1000).astype(np.float32)
        channels = 256

        # Test whether decoded signal is roughly equal to
        # what was encoded before
        with self.test_session() as sess:
            encoded = mu_law_encode(x, channels)
            x1 = sess.run(mu_law_decode(encoded, channels))

        self.assertAllClose(x, x1, rtol=1e-1, atol=0.05)

        # Make sure that re-encoding leaves the waveform invariant
        with self.test_session() as sess:
            encoded = mu_law_encode(x1, channels)
            x2 = sess.run(mu_law_decode(encoded, channels))

        self.assertAllClose(x1, x2)
Пример #6
0
    def testEndToEndTraining(self):
        audio = make_sine_waves()
        np.random.seed(42)

        # if self.generate:
        #    librosa.output.write_wav('/tmp/sine_train.wav', audio,
        #                             SAMPLE_RATE_HZ)
        #    power_spectrum = np.abs(np.fft.fft(audio))**2
        #    freqs = np.fft.fftfreq(audio.size, SAMPLE_PERIOD_SECS)
        #    indices = np.argsort(freqs)
        #    indices = [index for index in indices if freqs[index] >= 0 and
        #                                             freqs[index] <= 500.0]
        #    plt.plot(freqs[indices], power_spectrum[indices])
        #    plt.show()

        audio_tensor = tf.convert_to_tensor(audio, dtype=tf.float32)
        encode_output = mu_law_encode(audio_tensor, QUANTIZATION_CHANNELS)
        loss = self.net.loss(encode_output)
        optimizer = optimizer_factory[self.optimizer_type](
                      learning_rate=self.learning_rate, momentum=self.momentum)
        trainable = tf.trainable_variables()
        optim = optimizer.minimize(loss, var_list=trainable)
        init = tf.initialize_all_variables()

        generated_waveform = None
        max_allowed_loss = 0.1
        loss_val = max_allowed_loss
        initial_loss = None
        with self.test_session() as sess:
            sess.run(init)
            initial_loss = sess.run(loss)
            for i in range(TRAIN_ITERATIONS):
                loss_val, _ = sess.run([loss, optim])
                # if i % 10 == 0:
                #     print("i: %d loss: %f" % (i, loss_val))

            # Sanity check the initial loss was larger.
            self.assertGreater(initial_loss, max_allowed_loss)

            # Loss after training should be small.
            self.assertLess(loss_val, max_allowed_loss)

            # Loss should be at least two orders of magnitude better
            # than before training.
            self.assertLess(loss_val / initial_loss, 0.01)

            # saver = tf.train.Saver(var_list=tf.trainable_variables())
            # saver.save(sess, '/tmp/sine_test_model.ckpt', global_step=i)
            if self.generate:
                # Check non-incremental generation
                generated_waveform = generate_waveform(sess, self.net, False)
                check_waveform(self.assertGreater, generated_waveform)

                # Check incremental generation
                generated_waveform = generate_waveform(sess, self.net, True)
                check_waveform(self.assertGreater, generated_waveform)
Пример #7
0
    def testEncodeNegativeChannelSize(self):
        np.random.seed(1944)  # For repeatability of test.

        channels = -256
        number_of_samples = 1024
        x = np.zeros(number_of_samples).astype(np.float32)
        manual_encode = manual_mu_law_encode(x, channels)

        with self.test_session() as sess:
            self.assertRaises(TypeError, sess.run(mu_law_encode(x, channels)))
Пример #8
0
    def testEncodeNegativeChannelSize(self):
        np.random.seed(1944)  # For repeatability of test.

        channels = -256
        number_of_samples = 1024
        x = np.zeros(number_of_samples).astype(np.float32)
        manual_encode = manual_mu_law_encode(x, channels)

        with self.test_session() as sess:
            self.assertRaises(TypeError, sess.run(mu_law_encode(x, channels)))
Пример #9
0
    def testEncodeUniformRandomNoise(self):
        np.random.seed(42)  # For repeatability of test.

        channels = 256
        number_of_samples = 2048
        x = np.random.uniform(-1, 1, number_of_samples).astype(np.float32)
        manual_encode = manual_mu_law_encode(x, channels)

        with self.test_session() as sess:
            encode = sess.run(mu_law_encode(x, channels))

        self.assertAllEqual(manual_encode, encode)
Пример #10
0
    def testEncodeZeros(self):
        np.random.seed(1944)  # For repeatability of test.

        channels = 256
        number_of_samples = 1024
        x = np.zeros(number_of_samples).astype(np.float32)
        manual_encode = manual_mu_law_encode(x, channels)

        with self.test_session() as sess:
            encode = sess.run(mu_law_encode(x, channels))

        self.assertAllEqual(manual_encode, encode)
Пример #11
0
    def testEncodePrecomputed(self):
        channels = 256
        number_of_samples = 10
        x = np.array([-1.0, 1.0, 0.6, -0.25, 0.01,
                      0.33, -0.9999, 0.42, 0.1, -0.45]).astype(np.float32)
        encoded_manual = np.array([0, 255, 243, 32, 157,
                                   230, 0, 235, 203, 18]).astype(np.int32)

        with self.test_session() as sess:
            encoded = sess.run(mu_law_encode(x, channels))

        self.assertAllEqual(encoded_manual, encoded)
Пример #12
0
    def testEncodeUniformRandomNoise(self):
        np.random.seed(42)  # For repeatability of test.

        channels = 256
        number_of_samples = 2048
        x = np.random.uniform(-1, 1, number_of_samples).astype(np.float32)
        manual_encode = manual_mu_law_encode(x, channels)

        with self.test_session() as sess:
            encode = sess.run(mu_law_encode(x, channels))

        self.assertAllEqual(manual_encode, encode)
Пример #13
0
    def testEncodeZeros(self):
        np.random.seed(1944)  # For repeatability of test.

        channels = 256
        number_of_samples = 1024
        x = np.zeros(number_of_samples).astype(np.float32)
        manual_encode = manual_mu_law_encode(x, channels)

        with self.test_session() as sess:
            encode = sess.run(mu_law_encode(x, channels))

        self.assertAllEqual(manual_encode, encode)
Пример #14
0
    def testEncodeRamp(self):
        np.random.seed(1944)  # For repeatability of test.

        channels = 256
        number_of_samples = 1024
        number_of_steps = 2.0 / number_of_samples
        x = np.arange(-1.0, 1.0, number_of_steps).astype(np.float32)
        manual_encode = manual_mu_law_encode(x, channels)

        with self.test_session() as sess:
            encode = sess.run(mu_law_encode(x, channels))

        self.assertAllEqual(manual_encode, encode)
Пример #15
0
def create_seed(filename,
                sample_rate,
                quantization_channels,
                window_size=WINDOW):
    audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
    audio = audio_reader.trim_silence(audio)

    quantized = mu_law_encode(audio, quantization_channels)
    cut_index = tf.cond(tf.size(quantized) < tf.constant(window_size),
            lambda: tf.size(quantized),
            lambda: tf.constant(window_size))

    return quantized[:cut_index]
Пример #16
0
def create_seed(waveform,
                sample_rate,
                quantization_channels,
                window_size,
                silence_threshold=SILENCE_THRESHOLD):
    #audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
    #audio = audio_reader.trim_silence(audio, silence_threshold)

    quantized = mu_law_encode(waveform, quantization_channels)
    # explaning lambda function           this is the condition        if true return size of quantized        else return const size (varies)
    cut_index = tf.cond(tf.size(quantized) < tf.constant(window_size), lambda: tf.size(quantized), lambda: tf.constant(window_size))

    return quantized[:cut_index]
Пример #17
0
    def testEncodePrecomputed(self):
        channels = 256
        number_of_samples = 10
        x = np.array(
            [-1.0, 1.0, 0.6, -0.25, 0.01, 0.33, -0.9999, 0.42, 0.1,
             -0.45]).astype(np.float32)
        encoded_manual = np.array([0, 255, 243, 32, 157, 230, 0, 235, 203,
                                   18]).astype(np.int32)

        with self.test_session() as sess:
            encoded = sess.run(mu_law_encode(x, channels))

        self.assertAllEqual(encoded_manual, encoded)
Пример #18
0
def create_seed(filename,
                sample_rate,
                quantization_channels,
                window_size,
                silence_threshold=SILENCE_THRESHOLD):
    audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
    #audio = audio_reader.trim_silence(audio, silence_threshold)
    quantized = mu_law_encode(audio, quantization_channels)
    cut_index = tf.cond(tf.size(quantized) < tf.constant(window_size),
                        lambda: tf.size(quantized),
                        lambda: tf.constant(window_size))

    return quantized[:cut_index]
Пример #19
0
    def testEncodeRamp(self):
        np.random.seed(1944)  # For repeatability of test.

        channels = 256
        number_of_samples = 1024
        number_of_steps = 2.0 / number_of_samples
        x = np.arange(-1.0, 1.0, number_of_steps).astype(np.float32)
        manual_encode = manual_mu_law_encode(x, channels)

        with self.test_session() as sess:
            encode = sess.run(mu_law_encode(x, channels))

        self.assertAllEqual(manual_encode, encode)
Пример #20
0
    def testDecodeEncode(self):
        # generate every possible quantized level.
        x = np.array(range(QUANT_LEVELS), dtype=np.int)

        # Encoded then decode every value.
        with self.test_session() as sess:
            # Decode into floating-point scalar.
            decoded = mu_law_decode(x, QUANT_LEVELS)
            # Encode back into an integer quantization level.
            encoded = mu_law_encode(decoded, QUANT_LEVELS)
            round_tripped = sess.run(encoded)

        # decoding then encoding every level should produce what we started
        # with.
        self.assertAllEqual(x, round_tripped)
Пример #21
0
    def testDecodeEncode(self):
        # generate every possible quantized level.
        x = np.array(range(QUANT_LEVELS), dtype=np.int)

        # Encoded then decode every value.
        with self.test_session() as sess:
            # Decode into floating-point scalar.
            decoded = mu_law_decode(x, QUANT_LEVELS)
            # Encode back into an integer quantization level.
            encoded = mu_law_encode(decoded, QUANT_LEVELS)
            round_tripped = sess.run(encoded)

        # decoding then encoding every level should produce what we started
        # with.
        self.assertAllEqual(x, round_tripped)
Пример #22
0
    def testEncodeDecodeShift(self):
        x = np.linspace(-1, 1, 1000).astype(np.float32)
        with self.test_session() as sess:
            encoded = mu_law_encode(x, QUANT_LEVELS)
            decoded = mu_law_decode(encoded, QUANT_LEVELS)
            roundtripped = sess.run(decoded)

        # Detect non-unity scaling and non-zero shift in the roundtripped
        # signal by asserting that slope = 1 and y-intercept = 0 of line fit to
        # roundtripped vs x values.
        coeffs = np.polyfit(x, roundtripped, 1)
        slope = coeffs[0]
        y_intercept = coeffs[1]
        EPSILON = 1e-4
        self.assertNear(slope, 1.0, EPSILON)
        self.assertNear(y_intercept, 0.0, EPSILON)
Пример #23
0
def create_seed(filename,sample_rate,quantization_channels,window_size,scalar_input):
    # seed의 앞부분만 사용한다.
    seed_audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
    seed_audio = audio.trim_silence(seed_audio, default_hparams)
    if scalar_input:
        if len(seed_audio) < window_size:
            return seed_audio
        else: return seed_audio[:window_size]
    else:
        quantized = mu_law_encode(seed_audio, quantization_channels)
    
    
        # 짧으면 짧은 대로 return하는데, padding이라도 해야되지 않나???
        cut_index = tf.cond(tf.size(quantized) < tf.constant(window_size), lambda: tf.size(quantized), lambda: tf.constant(window_size))
    
        return quantized[:cut_index]
Пример #24
0
    def testEncodeDecodeShift(self):
        x = np.linspace(-1, 1, 1000).astype(np.float32)
        with self.test_session() as sess:
            encoded = mu_law_encode(x, QUANT_LEVELS)
            decoded = mu_law_decode(encoded, QUANT_LEVELS)
            roundtripped = sess.run(decoded)

        # Detect non-unity scaling and non-zero shift in the roundtripped
        # signal by asserting that slope = 1 and y-intercept = 0 of line fit to
        # roundtripped vs x values.
        coeffs = np.polyfit(x, roundtripped, 1)
        slope = coeffs[0]
        y_intercept = coeffs[1]
        EPSILON = 1e-4
        self.assertNear(slope, 1.0, EPSILON)
        self.assertNear(y_intercept, 0.0, EPSILON)
Пример #25
0
def create_seed(filename,
                sample_rate,
                quantization_channels,
                window_size,
                scalar_input,
                silence_threshold=SILENCE_THRESHOLD):
    audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
    audio = audio_reader.trim_silence(audio, silence_threshold)
    if scalar_input:
        if len(audio) < window_size:
            return audio
        else:
            return audio[:window_size]
    else:
        quantized = mu_law_encode(audio, quantization_channels)

        # 짧으면 짧은 대로 return하는데, padding이라도 해야되지 않나???
        cut_index = tf.cond(
            tf.size(quantized) < tf.constant(window_size),
            lambda: tf.size(quantized), lambda: tf.constant(window_size))

        return quantized[:cut_index]
Пример #26
0
    x for x in os.listdir(DIRS['SONGS']) if x.endswith('.wav')
][0])
wav_fname_new = wav_fname.replace('.wav', '_after.wav')

# low raw audio

# In[14]:

audio, _ = librosa.load(wav_fname, sr=M_PARAMS['SAMPLE_RATE'], mono=True)
audio[1000:1050]

# encode it to 8 bit amplitude

# In[15]:

quantized = mu_law_encode(audio, M_PARAMS['QUANTISATION_CHANNELS'])
quantized[1000:1050].eval(session=sess)

# get RNN input

# In[16]:

quantized_oh = _one_hot(quantized)
quantized_oh[0][1000:1020].eval(session=sess)

# let RNN out be exact RNN input (for test)
#
# turn it back to 8 bit signal

# In[17]:
Пример #27
0
 def testEncodeIsSurjective(self):
     x = np.linspace(-1, 1, 10000).astype(np.float32)
     channels = 123
     with self.test_session() as sess:
         encoded = sess.run(mu_law_encode(x, channels))
     self.assertEqual(len(np.unique(encoded)), channels)
Пример #28
0
 def testEncodeIsSurjective(self):
     x = np.linspace(-1, 1, 10000).astype(np.float32)
     channels = 123
     with self.test_session() as sess:
         encoded = sess.run(mu_law_encode(x, channels))
     self.assertEqual(len(np.unique(encoded)), channels)