示例#1
0
    def __init__(self):
        os.environ["CUDA_VISIBLE_DEVICES"] = "0"
        x_train = load_all("nsynth", "organ_electronic", forceLoad=True)
        self.X_TRAIN = x_train
        self.samples = x_train.shape[1]
        self.channels = 1
        self.kernel_size = 5
        self.audio_shape = (self.samples, self.channels)
        self.latent_dim = 100
        self.folder_name = "simplegannsynth"

        optimizer = Adam(0.0002, 0.5)

        # Build and compile the discriminator
        self.discriminator = self.build_discriminator()
        self.discriminator.compile(loss='binary_crossentropy',
                                   optimizer=optimizer,
                                   metrics=['accuracy'])

        # Build the generator
        self.generator = self.build_generator()

        # The generator takes noise as input and generates audio
        z = Input(shape=(self.latent_dim, ))
        audio = self.generator(z)

        # For the combined model we will only train the generator
        self.discriminator.trainable = False

        # The discriminator takes generated audio as input and determines validity
        validity = self.discriminator(audio)

        # The combined model  (stacked generator and discriminator)
        # Trains the generator to fool the discriminator
        self.combined = Model(z, validity)
        self.combined.compile(loss='binary_crossentropy', optimizer=optimizer)
    def __init__(self):
        os.environ["CUDA_VISIBLE_DEVICES"] = "0"
        x_train = load_all("categorized",
                           "cat",
                           forceLoad=True,
                           framerate=32768)
        self.X_TRAIN = x_train
        self.samples = x_train.shape[1]
        self.channels = 1
        self.kernel_size = 5
        self.audio_shape = (self.samples, self.channels)
        self.latent_dim = 100
        self.folder_name = "wganbatchnorm"

        # Following parameter and optimizer set as recommended in paper
        self.n_critic = 5
        optimizer = RMSprop(lr=0.00005)

        # Build the generator and critic
        self.generator = self.build_generator()
        self.critic = self.build_critic()

        #-------------------------------
        # Construct Computational Graph
        #       for the Critic
        #-------------------------------

        # Freeze generator's layers while training critic
        self.generator.trainable = False

        # Image input (real sample)
        real_clip = Input(shape=self.audio_shape)

        # Noise input
        z_disc = Input(shape=(100, ))
        # Generate image based of noise (fake sample)
        fake_clip = self.generator(z_disc)

        # Discriminator determines validity of the real and fake images
        fake = self.critic(fake_clip)
        valid = self.critic(real_clip)

        # Construct weighted average between real and fake images
        interpolated_clip = RandomWeightedAverage()([real_clip, fake_clip])
        # Determine validity of weighted sample
        print("Look at meeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee:")
        print(interpolated_clip)
        validity_interpolated = self.critic(interpolated_clip)

        # Use Python partial to provide loss function with additional
        # 'averaged_samples' argument
        partial_gp_loss = partial(self.gradient_penalty_loss,
                                  averaged_samples=interpolated_clip)
        partial_gp_loss.__name__ = 'gradient_penalty'  # Keras requires function names

        self.critic_model = Model(inputs=[real_clip, z_disc],
                                  outputs=[valid, fake, validity_interpolated])
        self.critic_model.compile(loss=[
            self.wasserstein_loss, self.wasserstein_loss, partial_gp_loss
        ],
                                  optimizer=optimizer,
                                  loss_weights=[1, 1, 10])
        #-------------------------------
        # Construct Computational Graph
        #         for Generator
        #-------------------------------

        # For the generator we freeze the critic's layers
        self.critic.trainable = False
        self.generator.trainable = True

        # Sampled noise for input to generator
        z_gen = Input(shape=(100, ))
        # Generate images based of noise
        img = self.generator(z_gen)
        # Discriminator determines validity
        valid = self.critic(img)
        # Defines generator model
        self.generator_model = Model(z_gen, valid)
        self.generator_model.compile(loss=self.wasserstein_loss,
                                     optimizer=optimizer)
示例#3
0
import keras
from pydub import AudioSegment
from keras.models import Sequential, Model
from keras.layers.advanced_activations import LeakyReLU
from keras.layers import Input, Dense, UpSampling1D, Conv1D, Activation, Reshape, Flatten
import numpy as np
from playsound import play_sound, play_and_save_sound
from audio_loader import load_all

latent_dim = 100

#sound = AudioSegment.from_wav("input/speech_commands/bird/0a7c2a8d_nohash_1.wav")
sound = load_all("categorized", "cat", forceLoad=True)

play_and_save_sound(sound, "endtoend2", "original", upscale=False)

sound = sound / 65536
sound = sound + 0.5
target = np.array(sound[0])
print(target.shape)

input_shape = (1, target.shape[0])
print(input_shape)

play_and_save_sound(sound, "endtoend2", "normalized", upscale=True)

model = Sequential()

model.add(Activation("relu", input_shape=input_shape))
#model.add(Conv1D(32, kernel_size=5, activation='selu', strides=2,padding="same"))
#model.add(UpSampling1D())
                                        strides=self._strides,
                                        *self._args, **self._kwargs))
        self._model.add(Lambda(lambda x: x[:,0]))
        self._model.summary()
        super(Conv1DTranspose, self).build(input_shape)

    def call(self, x):
        return self._model(x)

    def compute_output_shape(self, input_shape):
        return self._model.compute_output_shape(input_shape)

os.environ["CUDA_VISIBLE_DEVICES"]="0"
latent_dim = 10
save_folder = "bassnsynthupsample"
sound = load_all("nsynth", "bass_synthetic",forceLoad=True)

save_sound(sound, save_folder, "original", upscale=False)
#print("hows the sound")
#print(len(sound[0]))
sound = sound / 65536
#print(len(sound[0]))
sound = sound + 0.5
#print(len(sound[0]))
target = np.array(sound[0])
target = target.reshape(1, target.shape[0], target.shape[1])
#print(target.shape)

input_shape = (1,target.shape[0])
#print(input_shape)