Exemplo n.º 1
0
import librosa
from wavenet_model import *
from audio_data import WavenetDataset
from wavenet_training import *

model = load_latest_model_from('snapshots', use_cuda=False)

print('model: ', model)
print('receptive field: ', model.receptive_field)
print('parameter count: ', model.parameter_count())

data = WavenetDataset(dataset_file='vocaloid/dataset.npz',
                      item_length=model.receptive_field + model.output_length - 1,
                      target_length=model.output_length,
                      file_location='vocaloid',
                      test_stride=20)
print('the dataset has ' + str(len(data)) + ' items')

start_data = data[250000][0]
start_data = torch.max(start_data, 0)[1]


def prog_callback(step, total_steps):
    print(str(100 * step // total_steps) + "% generated")


generated = model.generate_fast(num_samples=16000 * 20,
                                 first_samples=start_data,
                                 progress_callback=prog_callback,
                                 progress_interval=1000,
                                 temperature=1.0,
Exemplo n.º 2
0
#model = load_latest_model_from('snapshots', use_cuda=True)
#model = torch.load('snapshots/some_model')

if use_cuda:
    print("move model to gpu")
    model.cuda()

print('model: ', model)
print('receptive field: ', model.receptive_field)
print('parameter count: ', model.parameter_count())

data = WavenetDataset(dataset_file=dataset_file,
                      item_length=model.receptive_field + model.output_length -
                      1,
                      target_length=model.output_length,
                      file_location=dir_path,
                      test_stride=500,
                      sampling_rate=sr)
print('the dataset has ' + str(len(data)) + ' items')


def generate_and_log_samples(step):
    sample_length = 32000
    gen_model = load_latest_model_from('snapshots', use_cuda=False)
    print("start generating...")
    samples = generate_audio(gen_model,
                             length=sample_length,
                             temperatures=[0.5])
    tf_samples = tf.convert_to_tensor(samples, dtype=tf.float32)
    logger.audio_summary('temperature_0.5', tf_samples, step, sr=16000)
Exemplo n.º 3
0
    ltype = torch.cuda.LongTensor

model = WaveNetModel(layers=8,
                     blocks=4,
                     dilation_channels=16,
                     residual_channels=16,
                     skip_channels=16,
                     output_length=8,
                     dtype=dtype)

#model = load_latest_model_from('snapshots')
#model = torch.load('snapshots/snapshot_2017-12-10_09-48-19')

data = WavenetDataset(dataset_file='train_samples/saber/dataset.npz',
                      item_length=model.receptive_field + model.output_length -
                      1,
                      target_length=model.output_length,
                      file_location='train_samples/saber',
                      test_stride=20)

# torch.save(model, 'untrained_model')
print('the dataset has ' + str(len(data)) + ' items')
print('model: ', model)
print('receptive field: ', model.receptive_field)
print('parameter count: ', model.parameter_count())


def generate_and_log_samples(step):
    sample_length = 4000
    gen_model = load_latest_model_from('snapshots')
    print("start generating...")
    samples = generate_audio(gen_model, length=sample_length, temperatures=[0])
Exemplo n.º 4
0
                     residual_channels=32,
                     skip_channels=1024,
                     end_channels=512,
                     output_length=16,
                     bias=True)

model = load_latest_model_from('snapshots')
#model = torch.load('snapshots/some_model')

print('model: ', model)
print('receptive field: ', model.receptive_field)
print('parameter count: ', model.parameter_count())

data = WavenetDataset(dataset_file=str(DATABASE_PATH),
                      item_length=model.receptive_field + model.output_length - 1,
                      target_length=model.output_length,
                      file_location=str(TEST_FOLDER),
                      test_stride=500)
print('the dataset has ' + str(len(data)) + ' items')


def generate_and_log_samples(step):
    sample_length=32000
    gen_model = load_latest_model_from('snapshots')
    print("start generating...")
    samples = generate_audio(gen_model,
                             length=sample_length,
                             temperatures=[0.5])
    logger.audio_summary('temperature_0.5', samples, step, sr=16000)

    samples = generate_audio(gen_model,
                     bias=True)

#model = load_latest_model_from('snapshots', use_cuda=True)
#model = torch.load('snapshots/some_model')

if use_cuda:
    print("move model to gpu")
    model.cuda()

print('model: ', model)
print('receptive field: ', model.receptive_field)
print('parameter count: ', model.parameter_count())

data = WavenetDataset(dataset_file='train_samples/bach_chaconne/dataset.npz',
                      item_length=model.receptive_field + model.output_length - 1,
                      target_length=model.output_length,
                      file_location='train_samples/bach_chaconne',
                      test_stride=500)
print('the dataset has ' + str(len(data)) + ' items')


def generate_and_log_samples(step):
    sample_length=32000
    gen_model = load_latest_model_from('snapshots', use_cuda=False)
    print("start generating...")
    samples = generate_audio(gen_model,
                             length=sample_length,
                             temperatures=[0.5])
    tf_samples = tf.convert_to_tensor(samples, dtype=tf.float32)
    logger.audio_summary('temperature_0.5', tf_samples, step, sr=16000)
Exemplo n.º 6
0
epochs = 50
batch_size = 50
seq_len = 2000
target_len = 2000 // 40
out_classes = 256

lr = 0.0001
model = maskGRU(hidden_dim=hidden_dim,
                batch_size=batch_size,
                input_dim=input_dim,
                onehot_dim=256,
                out_classes=256,
                out_classes_tmp=300,
                embbed_dim=50)
data = WavenetDataset(dataset_file='mp3/jhs.npz',
                      item_length=seq_len,
                      target_length=target_len,
                      test_stride=500)
print('the dataset has ' + str(len(data)) + ' items')

print('time:',
      time.asctime(time.localtime(time.time())).split()[3],
      'start training...')
model.train()
model.cuda()
dataloader = torch.utils.data.DataLoader(data,
                                         batch_size=batch_size,
                                         shuffle=True,
                                         num_workers=2,
                                         pin_memory=False)
optimizer = torch.optim.Adam(model.parameters(), lr)
#model = load_latest_model_from('snapshots', use_cuda=True)
#model = torch.load('snapshots/some_model')

if use_cuda:
    print("move model to gpu")
    model.cuda()

print('model: ', model)
print('receptive field: ', model.receptive_field)
print('parameter count: ', model.parameter_count())

data = WavenetDataset(dataset_file='/tmp/experiment/dataset.npz',
                      item_length=model.receptive_field + model.output_length - 1,
                      target_length=model.output_length,
                      s3_bucket='bensandboxbucket',
                      s3_folder='WavenetSampleGen/data',
                      dataset_name='basic-jazz',
                      test_stride=500)
print('the dataset has ' + str(len(data)) + ' items')

trainer = WavenetTrainer(model=model,
                         dataset=data,
                         s3_folder='WavenetSampleGen/',
                         s3_bucket='bensandboxbucket',
                         lr=0.0001,
                         weight_decay=0.0,
                         snapshot_interval=1000,
                         dtype=dtype,
                         ltype=ltype)
Exemplo n.º 8
0
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate,
          iters_per_checkpoint, batch_size, seed, checkpoint_path):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    criterion = CrossEntropyLoss()
    model = WaveNet(**wavenet_config).cpu()

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration + 1

    print(f"receptive_field: {model.receptive_field()}")
    trainset = WavenetDataset(
        dataset_file='data/dataset.npz',
        item_length=model.receptive_field() + 1000 + model.output_length - 1,
        target_length=model.output_length,
        file_location='data/',
        test_stride=500,
    )
    print(trainset._length)
    print('the dataset has ' + str(len(trainset)) + ' items')
    train_loader = DataLoader(
        trainset,
        batch_size=batch_size,
        shuffle=True,
        pin_memory=False,
    )

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    start = time.time()
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        for i, batch in enumerate(train_loader):
            model.zero_grad()
            y, target = batch
            y = to_gpu(y).float()
            target = to_gpu(target)
            y_pred = model((None, y))
            loss = criterion(y_pred[:, :, -model.output_length:], target)
            loss.backward()
            optimizer.step()

            print("{}:\t{:.9f}".format(iteration, loss))
            print_etr(start,
                      total_iterations=(epochs - epoch_offset) *
                      len(train_loader),
                      current_iteration=epoch * len(train_loader) + i + 1)
            writer.add_scalar('Loss/train', loss, global_step=iteration)

            if (iteration % iters_per_checkpoint == 0):
                y_choice = y_pred[0].detach().cpu().transpose(0, 1)
                y_prob = F.softmax(y_choice, dim=1)
                y_prob_collapsed = torch.multinomial(y_prob,
                                                     num_samples=1).squeeze(1)
                y_pred_audio = mu_law_decode_numpy(y_prob_collapsed.numpy(),
                                                   model.n_out_channels)
                import torchaudio
                y_audio = mu_law_decode_numpy(y.numpy(), model.n_out_channels)
                torchaudio.save("test_in.wav", torch.tensor(y_audio), 16000)
                torchaudio.save("test_out.wav", torch.tensor(y_pred_audio),
                                16000)
                writer.add_audio('Audio',
                                 y_pred_audio,
                                 global_step=iteration,
                                 sample_rate=data_config['sampling_rate'])
                checkpoint_path = "{}/wavenet_{}".format(
                    output_directory, iteration)
                save_checkpoint(model, optimizer, learning_rate, iteration,
                                checkpoint_path)

            writer.flush()
            iteration += 1
Exemplo n.º 9
0
                     bias=True)

#model = load_latest_model_from('snapshots', use_cuda=True)
#model = torch.load('snapshots/some_model')

if use_cuda:
    print("move model to gpu")
    model.cuda()

print('model: ', model)
print('receptive field: ', model.receptive_field)
print('parameter count: ', model.parameter_count())

data = WavenetDataset(dataset_file='/Users/max/repos/xenakis/data/808/808_test_data.npz',
                      item_length=model.receptive_field + model.output_length - 1,
                      target_length=model.output_length,
                      file_location='/Users/max/repos/xenakis/data/808/808_test_data',
                      test_stride=500)
print('the dataset has ' + str(len(data)) + ' items')


def generate_and_log_samples(step):
    sample_length=32000
    gen_model = load_latest_model_from('snapshots', use_cuda=False)
    print("start generating...")
    samples = generate_audio(gen_model,
                             length=sample_length,
                             temperatures=[0.5])
    tf_samples = tf.convert_to_tensor(samples, dtype=tf.float32)
    logger.audio_summary('temperature_0.5', tf_samples, step, sr=16000)
Exemplo n.º 10
0
    target = Variable(target.type(ltype)).squeeze()
    domain_index = Variable(domain_index.type(ltype))

    return (domain_index, x, target)


for in_file in input_files:
    filename = os.path.splitext(os.path.basename(in_file))[0]
    print(filename)

    for domain_index in range(len(DOMAINS)):
        # Important: this is a wavenet dataset for a single domain
        dataset = WavenetDataset(dataset_file=GENRATION_BASE + filename +
                                 '.npz',
                                 item_length=SR,
                                 target_length=SR,
                                 file_location=in_file,
                                 train=False,
                                 domain_index=domain_index,
                                 test_stride=1)

        dataloader = torch.utils.data.DataLoader(
            dataset,
            batch_size=BATCH_SIZE,
            shuffle=False,
            num_workers=4,  # num_workers=8,
            pin_memory=False)

        i = 0
        total = len(dataset) // BATCH_SIZE
        total = 16 // BATCH_SIZE
        print(total, "samples")