def generate(hparams): input_vocab_size = 128 + 128 + 128 + 128 target_vocab_size = 128 + 128 + 128 + 128 dataset = tf.data.Dataset.list_files( '/home/big/datasets/maestro-v2.0.0/**/*.midi') dataset_single = pro.pipeline([ pro.midi(), pro.frame(hparams['frame_size'], hparams['frame_size'], True), pro.unbatch(), ])(dataset).skip(16000).as_numpy_iterator() transformer = Transformer(input_vocab_size=input_vocab_size, target_vocab_size=target_vocab_size, pe_input=input_vocab_size, pe_target=target_vocab_size, hparams=hparams) trainer = Trainer(dataset, hparams) ckpt = tf.train.Checkpoint(step=trainer.step, transformer=transformer, optimizer=transformer.optimizer) trainer.init_checkpoint(ckpt) return generate_from_model(hparams, transformer, dataset_single)
def generate(hparams, seed, pitches): gan_stats = np.load('gan_stats.npz') gan = GAN((256, 128), hparams) trainer = Trainer(None, hparams) ckpt = tf.train.Checkpoint( step=trainer.step, generator=gan.generator, discriminator=gan.discriminator, gen_optimizer=gan.generator_optimizer, disc_optimizer=gan.discriminator_optimizer, ) trainer.init_checkpoint(ckpt) #seed = tf.repeat(seed, count, axis=0) pitches = tf.one_hot(pitches, hparams['cond_vector_size'], axis=1) samples = tf.reshape(gan.generator([seed, pitches], training=False), [-1, 256, 128]) audio = pro.pipeline([ pro.denormalize(normalization='specgan', stats=gan_stats), pro.invert_log_melspec(hparams['sample_rate']), list, # Stupid workaround becuase invert_log_melspec only does np.array, # one spectrogram at a time ])(samples) return samples, audio
def start(hparams): vae = VAE(hparams) trainer = Trainer(None, hparams) ckpt = tf.train.Checkpoint( step=trainer.step, encoder=vae.encoder, decoder=vae.decoder, vae=vae.vae, ) trainer.init_checkpoint(ckpt) samples = tf.reshape(vae.sample(100), [-1]).numpy() librosa.output.write_wav('vae_sample2.wav', samples, hparams['sample_rate'], norm=False)
def _eval(model): trainer = Trainer(dataset, hparams) trainer.set_train_step(model.train_step) trainer.on_epoch_start = on_epoch_start trainer.on_step = on_step trainer.on_epoch_complete = on_epoch_complete stats = trainer.run() if stats is not None: loss, _, _ = stats return 1 / tf.reduce_mean(loss).numpy() else: return 0
print(midi) pitches = tf.cast( [a.pitch - 24 for a in midi if isinstance(a, M.Midi.NoteEvent)], tf.int32) amp = tf.cast( [a.velocity / 127 for a in midi if isinstance(a, M.Midi.NoteEvent)], tf.float32) vel = tf.ones_like(pitches) * 2 sr = 16000 samples_per_note = 8000 tone_length = sr gan_stats = np.load('gan_stats.npz') gan = GAN((256, 128), gan_hparams) gan_trainer = Trainer(None, gan_hparams) gan_ckpt = tf.train.Checkpoint( step=gan_trainer.step, generator=gan.generator, discriminator=gan.discriminator, gen_optimizer=gan.generator_optimizer, disc_optimizer=gan.discriminator_optimizer, ) gan_trainer.init_checkpoint(gan_ckpt) def generate_tones(pitches): seed = tf.random.normal((len(pitches), gan_hparams['latent_size'])) pitches = tf.one_hot(pitches, gan_hparams['cond_vector_size'], axis=1) samples = gan.generator([seed, pitches], training=False)
def start(hparams): # Load nsynth dataset # dataset = tfds.load('nsynth/gansynth_subset', split='train', shuffle_files=True) dataset = tf.data.Dataset.list_files( '/home/big/datasets/maestro-v2.0.0/**/*.wav') dataset = pro.pipeline([ pro.wav(), # pro.resample(16000, hparams['sample_rate'], tf.float32), pro.normalize(), pro.frame(hparams['window_samples'], hparams['window_samples']), pro.unbatch(), pro.set_channels(1), pro.dupe(), pro.shuffle(hparams['buffer_size']), pro.batch(hparams['batch_size']), pro.prefetch() ])(dataset) vae = VAE(hparams) vae.vae.summary() trainer = Trainer(dataset, hparams) ckpt = tf.train.Checkpoint( step=trainer.step, encoder=vae.encoder, decoder=vae.decoder, vae=vae.vae, ) trainer.on_epoch_start = on_epoch_start trainer.on_step = on_step trainer.init_tensorboard() trainer.init_checkpoint(ckpt) trainer.set_train_step(vae.train_step) trainer.run()
def start(hparams): gc.collect() dataset = tf.data.Dataset.list_files( '/home/big/datasets/maestro-v2.0.0/**/*.midi') dataset_single = pro.pipeline([ pro.midi(), pro.frame(hparams['frame_size'] * 2, hparams['frame_hop_len'], True), pro.unbatch(), ])(dataset) def _reshape(inp, tar): inp = tf.reshape(inp, [hparams['frame_size']]) tar = tf.reshape(tar, [hparams['frame_size']]) return inp, tar dataset = pro.pipeline([ pro.split(2), #pro.batch(2, True), # pro.dupe(), pro.map_transform(_reshape), pro.cache(), pro.shuffle(hparams['buffer_size']), pro.batch(hparams['batch_size'], True), pro.prefetch(), ])(dataset_single) dataset_single = pro.shuffle(hparams['buffer_size'] // 4)(dataset_single) dataset_single = dataset_single.as_numpy_iterator() transformer = Transformer(input_vocab_size=input_vocab_size, target_vocab_size=target_vocab_size, pe_input=input_vocab_size, pe_target=target_vocab_size, hparams=hparams) # pop_size = 10 # generations = 100 # mutation_rate = 0.2 # pool.populate(pop_size, 1) # for generation in range(generations): # pool.evaluate(evaluate(dataset, hparams)) # print(f"--- GENERATION: {generation} ---") # print("BEST:", pool.best, pool.fitness) # pool.select(pop_size) # pool.populate(pop_size, mutation_rate) # image_save_step = hparams[ 'image_save_step'] if 'image_save_step' in hparams else 2000 def generate_image(step, tsw): print("Generating sample...") encoded, seed = generate_from_model(hparams, transformer, dataset_single) print("Generating sample done.") print("Decoding midi...") decoded_seed = pro.decode_midi()(seed) decoded = pro.decode_midi()(encoded) print("Decoding midi done.") print("Saving midi...") with open(f'gen_transformer_{step}.midi', 'wb') as f: M.write_midi(f, decoded) with open(f'prior_transformer_{step}.midi', 'wb') as f: M.write_midi(f, decoded_seed) print("Saving midi done.") print("Plotting midi...") plt.title('Prior') M.display_midi(decoded_seed) image_seed = util.get_plot_image() plt.clf() plt.title('Generated') M.display_midi(decoded) image = util.get_plot_image() plt.clf() image_conc = tf.concat([image_seed, image], axis=1) print("Plotting done.") with tsw.as_default(): tf.summary.image(f'image', image_conc, step=step) print("Complete.") # This runs at every step in the training (for each batch in dataset) def on_step(epoch, step, stats, tsw): loss, tar_real, predictions = stats train_loss(loss) train_accuracy(tar_real, predictions) if step % 100 == 0: print( f"Epoch: {epoch}, Step: {step}, Loss: {train_loss.result()}, Accuracy: {train_accuracy.result()}" ) if step % image_save_step == 0: generate_image(step, tsw) with tsw.as_default(): tf.summary.scalar('loss', train_loss.result(), step=step) trainer = Trainer(dataset, hparams) ckpt = tf.train.Checkpoint(step=trainer.step, transformer=transformer, optimizer=transformer.optimizer) trainer.init_checkpoint(ckpt) trainer.init_tensorboard() trainer.set_train_step(transformer.train_step) trainer.on_epoch_start = on_epoch_start trainer.on_step = on_step trainer.on_epoch_complete = on_epoch_complete #generate_image(trainer.step.numpy(), trainer.train_summary_writer) trainer.run()
def start(hparams): # Load nsynth dataset from tfds dataset = tfds.load('nsynth/gansynth_subset', split='train', shuffle_files=True) gan_stats = calculate_dataset_stats(hparams, dataset) #gan_stats = np.load('gan_stats.npz') dataset = nsynth_to_melspec(dataset, hparams, gan_stats) # Determine shape of the spectograms in the dataset spec_shape = None for x in dataset.take(1): e = x['audio'] cond = x['pitch'] spec_shape = e.shape print(cond) print(f'Spectogram shape: {spec_shape}') # Make sure we got a shape before continuing assert spec_shape is not None, "Could not get spectogram shape" # Make sure the dimensions of spectogram is divisible by 4. # This is because the generator is going to upscale it's state twice with a factor of 2. assert spec_shape[0] % 4 == 0 and spec_shape[ 1] % 4 == 0, "Spectogram dimensions is not divisible by 4" dataset = pro.index_map('audio', pro.reshape([*spec_shape, 1]))(dataset) # Create preprocessing pipeline for shuffling and batching dataset = pro.pipeline([ pro.cache(), pro.shuffle(hparams['buffer_size']), pro.batch(hparams['batch_size']), pro.prefetch() ])(dataset) gan = GAN(spec_shape, hparams) gan.discriminator.summary() gan.generator.summary() # This runs at the end of every epoch and is used to display metrics def on_epoch_complete(epoch, step, duration, tsw): #display.clear_output(wait=True) count = 6 seed = tf.random.normal((count, gan.hparams['latent_size'])) mid = gan.hparams['cond_vector_size'] // 2 pitches = tf.one_hot(range(mid - count // 2, mid + count // 2), gan.hparams['cond_vector_size'], axis=1) samples = tf.reshape(gan.generator([seed, pitches], training=False), [-1, 128, 128]) img = tf.unstack(samples) img = tf.reverse(tf.concat(img, axis=1), axis=[0]) plt.axis('off') plt.imshow(img) buf = io.BytesIO() plt.savefig(buf, format='png') buf.seek(0) # Convert PNG buffer to TF image image = tf.image.decode_png(buf.getvalue(), channels=4) # Add the batch dimension image = tf.expand_dims(image, 0) with tsw.as_default(): tf.summary.image(f'Spectrogram', image, step=step) print( f"Epoch: {epoch}, Step: {step}, Gen Loss: {gen_loss_avg.result()}, Disc Loss: {disc_loss_avg.result()}, Duration: {duration} s" ) trainer = Trainer(dataset, hparams) ckpt = tf.train.Checkpoint( step=trainer.step, generator=gan.generator, discriminator=gan.discriminator, gen_optimizer=gan.generator_optimizer, disc_optimizer=gan.discriminator_optimizer, ) trainer.init_checkpoint(ckpt) trainer.init_tensorboard() trainer.set_train_step(gan.train_step) trainer.on_epoch_start = on_epoch_start trainer.on_step = on_step trainer.on_epoch_complete = on_epoch_complete trainer.run()