def sample_clips(self, epoch): r, c = 5, 5 #noise = np.zeros(self.latent_dim) noise = np.random.normal(0, 0.01, (r * c, self.latent_dim)) gen_clips = self.generator.predict(noise) save_sound(gen_clips, self.folder_name, "cat", epoch, upscale=True)
def sample_clips(self, epoch): r, c = 5, 5 #noise = np.zeros(self.latent_dim) noise = np.random.normal(0, 0.01, (r * c, self.latent_dim)) gen_clips = self.generator.predict(noise) save_sound(gen_clips, "wgan", "clap", epoch) #play a sound print("Play a sound")
def train(self, epochs, batch_size=128, sample_interval=50): # Adversarial ground truths valid = np.ones((batch_size, 1)) fake = np.zeros((batch_size, 1)) X_train = self.X_TRAIN / 65536 X_train = X_train + 0.5 #print(len(sound[0]) save_sound(X_train, self.folder_name, "reference") self.sample_clips(-1) for epoch in range(epochs): # --------------------- # Train Discriminator # --------------------- # Select a random batch of images idx = np.random.randint(0, X_train.shape[0], batch_size) clips = X_train[idx] noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) # Generate a batch of new images gen_clips = self.generator.predict(noise) # Train the discriminator d_loss_real = self.discriminator.train_on_batch(clips, valid) d_loss_fake = self.discriminator.train_on_batch(gen_clips, fake) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # --------------------- # Train Generator # --------------------- noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) # Train the generator (to have the discriminator label samples as valid) g_loss = self.combined.train_on_batch(noise, valid) # Plot the progress print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100 * d_loss[1], g_loss)) # If at save interval => save generated image samples if epoch % sample_interval == 0: self.sample_clips(epoch)
def train(self, epochs, batch_size, sample_interval=50): # Load the dataset #X_train = self.X_TRAIN X_train = self.X_TRAIN / 65536 X_train = X_train + 0.5 save_sound(X_train, self.folder_name, "reference") # Adversarial ground truths valid = -np.ones((batch_size, 1)) fake = np.ones((batch_size, 1)) dummy = np.zeros((batch_size, 1)) # Dummy gt for gradient penalty for epoch in range(epochs): for _ in range(self.n_critic): # --------------------- # Train Discriminator # --------------------- # Select a random batch of images idx = np.random.randint(0, X_train.shape[0], batch_size) imgs = X_train[idx] # Sample generator input #noise = np.zeros((self.latent_dim,1)) noise = np.random.normal(0, 0.01, (batch_size, self.latent_dim)) # Train the critic d_loss = self.critic_model.train_on_batch([imgs, noise], [valid, fake, dummy]) # --------------------- # Train Generator # --------------------- g_loss = self.generator_model.train_on_batch(noise, valid) # Plot the progress print("%d [D loss: %f] [G loss: %f]" % (epoch, d_loss[0], g_loss)) # If at save interval => save generated image samples if epoch % sample_interval == 0: self.sample_clips(epoch)
self._model.add(Lambda(lambda x: x[:,0])) self._model.summary() super(Conv1DTranspose, self).build(input_shape) def call(self, x): return self._model(x) def compute_output_shape(self, input_shape): return self._model.compute_output_shape(input_shape) os.environ["CUDA_VISIBLE_DEVICES"]="0" latent_dim = 10 save_folder = "bassnsynthupsample" sound = load_all("nsynth", "bass_synthetic",forceLoad=True) save_sound(sound, save_folder, "original", upscale=False) #print("hows the sound") #print(len(sound[0])) sound = sound / 65536 #print(len(sound[0])) sound = sound + 0.5 #print(len(sound[0])) target = np.array(sound[0]) target = target.reshape(1, target.shape[0], target.shape[1]) #print(target.shape) input_shape = (1,target.shape[0]) #print(input_shape) samples = len(sound[0]) #print(samples)
# this model maps an input to its reconstruction autoencoder = Model(input_clip, decoded) autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy') autoencoder.summary() x_train = x_train.astype('float32') / 65536 x_train = x_train + 0.5 x_test = x_test.astype('float32') / 65536 x_test = x_test + 0.5 print (x_train.shape) autoencoder.fit(x_train, x_train, epochs=50, batch_size=32, shuffle=True, validation_data=(x_test, x_test)) from playsound import save_sound decoded_imgs = autoencoder.predict(x_test) n = 10 # how many digits we will display for i in range(n): save_sound(x_test,folder_name,"original",upscale=True,index=i, epoch=i) save_sound(decoded_imgs,folder_name,"decoded",upscale=True,index=i, epoch=i)
def sample_clips(self, epoch): r, c = 5, 5 noise = np.random.normal(0, 1, (r * c, self.latent_dim)) gen_clips = self.generator.predict(noise) save_sound(gen_clips, self.folder_name, "generated", epoch)
import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" num_classes = 10 (x_train, y_train), (x_test, y_test) = load_audio("nsynth", num_classes, forceLoad=False, framerate=16384) batch_size = 30 epochs = 50 kernel_size = 5 save_sound(x_train, "classification", "xtrain", upscale=False) save_sound(x_test, "classification", "xtest", upscale=False) input_shape = (x_train.shape[1], 1) convolution_layers = count_convolutions(input_shape, kernel_size) model = keras.models.Sequential() model.add( Conv1D(16, kernel_size=kernel_size, activation='selu', strides=2, input_shape=input_shape, padding="same")) for i in range(convolution_layers): model.add(