import librosa from wavenet_model import * from audio_data import WavenetDataset from wavenet_training import * model = load_latest_model_from('snapshots', use_cuda=False) print('model: ', model) print('receptive field: ', model.receptive_field) print('parameter count: ', model.parameter_count()) data = WavenetDataset(dataset_file='vocaloid/dataset.npz', item_length=model.receptive_field + model.output_length - 1, target_length=model.output_length, file_location='vocaloid', test_stride=20) print('the dataset has ' + str(len(data)) + ' items') start_data = data[250000][0] start_data = torch.max(start_data, 0)[1] def prog_callback(step, total_steps): print(str(100 * step // total_steps) + "% generated") generated = model.generate_fast(num_samples=16000 * 20, first_samples=start_data, progress_callback=prog_callback, progress_interval=1000, temperature=1.0,
#model = load_latest_model_from('snapshots', use_cuda=True) #model = torch.load('snapshots/some_model') if use_cuda: print("move model to gpu") model.cuda() print('model: ', model) print('receptive field: ', model.receptive_field) print('parameter count: ', model.parameter_count()) data = WavenetDataset(dataset_file=dataset_file, item_length=model.receptive_field + model.output_length - 1, target_length=model.output_length, file_location=dir_path, test_stride=500, sampling_rate=sr) print('the dataset has ' + str(len(data)) + ' items') def generate_and_log_samples(step): sample_length = 32000 gen_model = load_latest_model_from('snapshots', use_cuda=False) print("start generating...") samples = generate_audio(gen_model, length=sample_length, temperatures=[0.5]) tf_samples = tf.convert_to_tensor(samples, dtype=tf.float32) logger.audio_summary('temperature_0.5', tf_samples, step, sr=16000)
ltype = torch.cuda.LongTensor model = WaveNetModel(layers=8, blocks=4, dilation_channels=16, residual_channels=16, skip_channels=16, output_length=8, dtype=dtype) #model = load_latest_model_from('snapshots') #model = torch.load('snapshots/snapshot_2017-12-10_09-48-19') data = WavenetDataset(dataset_file='train_samples/saber/dataset.npz', item_length=model.receptive_field + model.output_length - 1, target_length=model.output_length, file_location='train_samples/saber', test_stride=20) # torch.save(model, 'untrained_model') print('the dataset has ' + str(len(data)) + ' items') print('model: ', model) print('receptive field: ', model.receptive_field) print('parameter count: ', model.parameter_count()) def generate_and_log_samples(step): sample_length = 4000 gen_model = load_latest_model_from('snapshots') print("start generating...") samples = generate_audio(gen_model, length=sample_length, temperatures=[0])
residual_channels=32, skip_channels=1024, end_channels=512, output_length=16, bias=True) model = load_latest_model_from('snapshots') #model = torch.load('snapshots/some_model') print('model: ', model) print('receptive field: ', model.receptive_field) print('parameter count: ', model.parameter_count()) data = WavenetDataset(dataset_file=str(DATABASE_PATH), item_length=model.receptive_field + model.output_length - 1, target_length=model.output_length, file_location=str(TEST_FOLDER), test_stride=500) print('the dataset has ' + str(len(data)) + ' items') def generate_and_log_samples(step): sample_length=32000 gen_model = load_latest_model_from('snapshots') print("start generating...") samples = generate_audio(gen_model, length=sample_length, temperatures=[0.5]) logger.audio_summary('temperature_0.5', samples, step, sr=16000) samples = generate_audio(gen_model,
bias=True) #model = load_latest_model_from('snapshots', use_cuda=True) #model = torch.load('snapshots/some_model') if use_cuda: print("move model to gpu") model.cuda() print('model: ', model) print('receptive field: ', model.receptive_field) print('parameter count: ', model.parameter_count()) data = WavenetDataset(dataset_file='train_samples/bach_chaconne/dataset.npz', item_length=model.receptive_field + model.output_length - 1, target_length=model.output_length, file_location='train_samples/bach_chaconne', test_stride=500) print('the dataset has ' + str(len(data)) + ' items') def generate_and_log_samples(step): sample_length=32000 gen_model = load_latest_model_from('snapshots', use_cuda=False) print("start generating...") samples = generate_audio(gen_model, length=sample_length, temperatures=[0.5]) tf_samples = tf.convert_to_tensor(samples, dtype=tf.float32) logger.audio_summary('temperature_0.5', tf_samples, step, sr=16000)
epochs = 50 batch_size = 50 seq_len = 2000 target_len = 2000 // 40 out_classes = 256 lr = 0.0001 model = maskGRU(hidden_dim=hidden_dim, batch_size=batch_size, input_dim=input_dim, onehot_dim=256, out_classes=256, out_classes_tmp=300, embbed_dim=50) data = WavenetDataset(dataset_file='mp3/jhs.npz', item_length=seq_len, target_length=target_len, test_stride=500) print('the dataset has ' + str(len(data)) + ' items') print('time:', time.asctime(time.localtime(time.time())).split()[3], 'start training...') model.train() model.cuda() dataloader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=False) optimizer = torch.optim.Adam(model.parameters(), lr)
#model = load_latest_model_from('snapshots', use_cuda=True) #model = torch.load('snapshots/some_model') if use_cuda: print("move model to gpu") model.cuda() print('model: ', model) print('receptive field: ', model.receptive_field) print('parameter count: ', model.parameter_count()) data = WavenetDataset(dataset_file='/tmp/experiment/dataset.npz', item_length=model.receptive_field + model.output_length - 1, target_length=model.output_length, s3_bucket='bensandboxbucket', s3_folder='WavenetSampleGen/data', dataset_name='basic-jazz', test_stride=500) print('the dataset has ' + str(len(data)) + ' items') trainer = WavenetTrainer(model=model, dataset=data, s3_folder='WavenetSampleGen/', s3_bucket='bensandboxbucket', lr=0.0001, weight_decay=0.0, snapshot_interval=1000, dtype=dtype, ltype=ltype)
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate, iters_per_checkpoint, batch_size, seed, checkpoint_path): torch.manual_seed(seed) torch.cuda.manual_seed(seed) #=====START: ADDED FOR DISTRIBUTED====== if num_gpus > 1: init_distributed(rank, num_gpus, group_name, **dist_config) #=====END: ADDED FOR DISTRIBUTED====== criterion = CrossEntropyLoss() model = WaveNet(**wavenet_config).cpu() #=====START: ADDED FOR DISTRIBUTED====== if num_gpus > 1: model = apply_gradient_allreduce(model) #=====END: ADDED FOR DISTRIBUTED====== optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Load checkpoint if one exists iteration = 0 if checkpoint_path != "": model, optimizer, iteration = load_checkpoint(checkpoint_path, model, optimizer) iteration += 1 # next iteration is iteration + 1 print(f"receptive_field: {model.receptive_field()}") trainset = WavenetDataset( dataset_file='data/dataset.npz', item_length=model.receptive_field() + 1000 + model.output_length - 1, target_length=model.output_length, file_location='data/', test_stride=500, ) print(trainset._length) print('the dataset has ' + str(len(trainset)) + ' items') train_loader = DataLoader( trainset, batch_size=batch_size, shuffle=True, pin_memory=False, ) # Get shared output_directory ready if rank == 0: if not os.path.isdir(output_directory): os.makedirs(output_directory) os.chmod(output_directory, 0o775) print("output directory", output_directory) model.train() epoch_offset = max(0, int(iteration / len(train_loader))) # ================ MAIN TRAINNIG LOOP! =================== start = time.time() for epoch in range(epoch_offset, epochs): print("Epoch: {}".format(epoch)) for i, batch in enumerate(train_loader): model.zero_grad() y, target = batch y = to_gpu(y).float() target = to_gpu(target) y_pred = model((None, y)) loss = criterion(y_pred[:, :, -model.output_length:], target) loss.backward() optimizer.step() print("{}:\t{:.9f}".format(iteration, loss)) print_etr(start, total_iterations=(epochs - epoch_offset) * len(train_loader), current_iteration=epoch * len(train_loader) + i + 1) writer.add_scalar('Loss/train', loss, global_step=iteration) if (iteration % iters_per_checkpoint == 0): y_choice = y_pred[0].detach().cpu().transpose(0, 1) y_prob = F.softmax(y_choice, dim=1) y_prob_collapsed = torch.multinomial(y_prob, num_samples=1).squeeze(1) y_pred_audio = mu_law_decode_numpy(y_prob_collapsed.numpy(), model.n_out_channels) import torchaudio y_audio = mu_law_decode_numpy(y.numpy(), model.n_out_channels) torchaudio.save("test_in.wav", torch.tensor(y_audio), 16000) torchaudio.save("test_out.wav", torch.tensor(y_pred_audio), 16000) writer.add_audio('Audio', y_pred_audio, global_step=iteration, sample_rate=data_config['sampling_rate']) checkpoint_path = "{}/wavenet_{}".format( output_directory, iteration) save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path) writer.flush() iteration += 1
bias=True) #model = load_latest_model_from('snapshots', use_cuda=True) #model = torch.load('snapshots/some_model') if use_cuda: print("move model to gpu") model.cuda() print('model: ', model) print('receptive field: ', model.receptive_field) print('parameter count: ', model.parameter_count()) data = WavenetDataset(dataset_file='/Users/max/repos/xenakis/data/808/808_test_data.npz', item_length=model.receptive_field + model.output_length - 1, target_length=model.output_length, file_location='/Users/max/repos/xenakis/data/808/808_test_data', test_stride=500) print('the dataset has ' + str(len(data)) + ' items') def generate_and_log_samples(step): sample_length=32000 gen_model = load_latest_model_from('snapshots', use_cuda=False) print("start generating...") samples = generate_audio(gen_model, length=sample_length, temperatures=[0.5]) tf_samples = tf.convert_to_tensor(samples, dtype=tf.float32) logger.audio_summary('temperature_0.5', tf_samples, step, sr=16000)
target = Variable(target.type(ltype)).squeeze() domain_index = Variable(domain_index.type(ltype)) return (domain_index, x, target) for in_file in input_files: filename = os.path.splitext(os.path.basename(in_file))[0] print(filename) for domain_index in range(len(DOMAINS)): # Important: this is a wavenet dataset for a single domain dataset = WavenetDataset(dataset_file=GENRATION_BASE + filename + '.npz', item_length=SR, target_length=SR, file_location=in_file, train=False, domain_index=domain_index, test_stride=1) dataloader = torch.utils.data.DataLoader( dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, # num_workers=8, pin_memory=False) i = 0 total = len(dataset) // BATCH_SIZE total = 16 // BATCH_SIZE print(total, "samples")