from time import time from utils import make_batch from models import WaveNet, Generator from IPython.display import Audio inputs, targets = make_batch('./voice.wav') num_time_samples = inputs.shape[1] num_channels = 1 gpu_fraction = 1 model = WaveNet(num_time_samples=num_time_samples, num_channels=num_channels, gpu_fraction=gpu_fraction) Audio(inputs.reshape(inputs.shape[1]), rate=44100) tic = time() model.train(inputs, targets) toc = time() print('Training time = {} seconds'.format(toc - tic)) # generator = Generator(model) # input_ = inputs[:, 0:1, 0] # tic = time() # predictions = generator.run(input_, 32000) # toc = time() # print('Generating time = {} seconds'.format(toc-tic))
def train(args): # Arugments & parameters dataset = args.dataset dataset_dir = args.dataset_dir workspace = args.workspace filename = args.filename batch_size = args.batch_size # Use an audio clip as a mini-batch. Must # be 1 if audio clips has different length. condition = args.condition cuda = args.cuda quantize_bins = config.quantize_bins dilations = config.dilations # Paths models_dir = os.path.join(workspace, 'models', 'dataset={}'.format(dataset), filename, 'condition={}'.format(condition)) create_folder(models_dir) # Data Generator Dataset = get_dataset(dataset) train_dataset = Dataset(dataset_dir, data_type='train') validate_dataset = Dataset(dataset_dir, data_type='validate') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True) # Model model = WaveNet( dilations, residual_channels=config.residual_channels, dilation_channels=config.dilation_channels, skip_channels=config.skip_channels, quantize_bins=config.quantize_bins, global_condition_channels=config.global_condition_channels, global_condition_cardinality=Dataset.global_condition_cardinality, use_cuda=cuda) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.) train_bgn_time = time.time() iteration = 0 while True: for (batch_x, global_condition) in train_loader: '''batch_x: (batch_size, seq_len) global_condition: (batch_size,) ''' print('iteration: {}, input size: {}'.format( iteration, batch_x.shape)) # Evaluate if iteration % 1000 == 0: train_fin_time = time.time() evaluate_bgn_time = time.time() loss = evaluate(model, validate_loader, condition, cuda) print('-----------------') logging.info( 'iteration: {}, loss: {:.3f}, train_time: {:.3f}, ' 'validate time: {:.3f} s'.format( iteration, loss, train_fin_time - train_bgn_time, time.time() - evaluate_bgn_time)) train_bgn_time = time.time() # Save model if iteration % 10000 == 0: save_out_dict = { 'iteration': iteration, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } save_out_path = os.path.join( models_dir, 'md_{}_iters.tar'.format(iteration)) torch.save(save_out_dict, save_out_path) logging.info('Save model to {}'.format(save_out_path)) # Move data to GPU if condition: global_condition = move_data_to_gpu(global_condition, cuda) else: global_condition = None batch_x = move_data_to_gpu(batch_x, cuda) # Prepare input and target data batch_input = batch_x[:, 0:-1] output_width = batch_input.shape[-1] - model.receptive_field + 1 batch_target = batch_x[:, -output_width:] # Forward model.train() batch_output = model(batch_input, global_condition) loss = _loss_func(batch_output, batch_target) # Backward optimizer.zero_grad() loss.backward() optimizer.step() print('loss: {:.3f}'.format(loss.data.cpu().numpy())) iteration += 1