def get_loader(data_path='data/vctk', max_seq_len=1000, batch_size=64, nspk=22): dataset = NpzFolder(data_path + '/numpy_features_valid', nspk == 1) loader = NpzLoader(dataset, max_seq_len=max_seq_len, batch_size=batch_size, num_workers=4, pin_memory=True) return loader
def get_loaders(data_path='data/vctk', max_seq_len=1000, batch_size=64, nspk=22): # wrap train dataset train_dataset = NpzFolder(data_path + '/numpy_features', nspk == 1) train_loader = NpzLoader(train_dataset, max_seq_len=max_seq_len, batch_size=batch_size, num_workers=4, pin_memory=True, shuffle=True) # wrap validation dataset valid_dataset = NpzFolder(data_path + '/numpy_features_valid', nspk == 1) valid_loader = NpzLoader(valid_dataset, max_seq_len=max_seq_len, batch_size=batch_size, num_workers=4, pin_memory=True) return train_loader, valid_loader
def main(): parser = argparse.ArgumentParser(description='PyTorch Loop') # Env options: parser.add_argument('--epochs', type=int, default=92, metavar='N', help='number of epochs to train (default: 92)') parser.add_argument('--seed', type=int, default=10, metavar='S', help='random seed (default: 3)') parser.add_argument('--expName', type=str, default='vctk', metavar='E', help='Experiment name') parser.add_argument('--data', default='data/vctk', metavar='D', type=str, help='Data path') parser.add_argument('--checkpoint', default='', metavar='C', type=str, help='Checkpoint path') parser.add_argument('--gpu', default=0, metavar='G', type=int, help='GPU device ID') # Data options parser.add_argument('--max-seq-len', type=int, default=1000, help='Max sequence length for tbptt') parser.add_argument('--batch-size', type=int, default=64, help='Batch size') # Model options parser.add_argument('--nspk', type=int, default=22, help='Number of speakers') # init args = parser.parse_args() args.expName = os.path.join('checkpoints', args.expName) torch.cuda.set_device(args.gpu) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) logging = create_output_dir(args) # data valid_dataset = NpzFolder(args.data + '/numpy_features_valid', args.nspk == 1) valid_loader = NpzLoader(valid_dataset, max_seq_len=args.max_seq_len, batch_size=args.batch_size, num_workers=4, pin_memory=True) # load model model, norm = model_def(args.checkpoint, gpu=args.gpu, valid_loader=valid_loader) # Begin! eval_loss = evaluate(model, norm, valid_loader, logging)
help='Memory number of segments') # init args = parser.parse_args() args.expName = os.path.join('checkpoints', args.expName) torch.cuda.set_device(args.gpu) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) logging = create_output_dir(args) vis = visdom.Visdom(env=args.expName) # data logging.info("Building dataset.") train_dataset = NpzFolder(args.data + '/numpy_features', args.nspk == 1) train_loader = NpzLoader(train_dataset, max_seq_len=args.max_seq_len, batch_size=args.batch_size, num_workers=4, pin_memory=True, shuffle=True) valid_dataset = NpzFolder(args.data + '/numpy_features_valid', args.nspk == 1) valid_loader = NpzLoader(valid_dataset, max_seq_len=args.max_seq_len, batch_size=args.batch_size, num_workers=4, pin_memory=True) logging.info("Dataset ready!")
help='Memory number of segments') # init args = parser.parse_args() args.expName = os.path.join('checkpoints', args.expName) torch.cuda.set_device(args.gpu) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) logging = create_output_dir(args) vis = visdom.Visdom(env=args.expName) # data logging.info("Building dataset.") train_dataset = NpzFolder(args.data + '/numpy_features') train_loader = NpzLoader(train_dataset, max_seq_len=args.max_seq_len, batch_size=args.batch_size, num_workers=4, pin_memory=True, shuffle=True) valid_dataset = NpzFolder(args.data + '/numpy_features_valid') valid_loader = NpzLoader(valid_dataset, max_seq_len=args.max_seq_len, batch_size=args.batch_size, num_workers=4, pin_memory=True) logging.info("Dataset ready!")
def main(): weights = torch.load(args.checkpoint, map_location=lambda storage, loc: storage) opt = torch.load(os.path.dirname(args.checkpoint) + '/args.pth') train_args = opt[0] train_dataset = NpzFolder(train_args.data + '/numpy_features') char2code = train_dataset.dict spkr2code = train_dataset.speakers norm_path = train_args.data + '/norm_info/norm.dat.npy' train_args.noise = 0 model = Loop(train_args) model.load_state_dict(weights) if args.gpu >= 0: model.cuda() model.eval() if args.spkr not in range(len(spkr2code)): print('ERROR: Unknown speaker id: %d.' % args.spkr) return txt, feat, spkr, output_fname = None, None, None, None if args.npz is not '': txt, feat = npy_loader_phonemes(args.npz) txt = Variable(txt.unsqueeze(1), volatile=True) feat = Variable(feat.unsqueeze(1), volatile=True) spkr = Variable(torch.LongTensor([args.spkr]), volatile=True) fname = os.path.basename(args.npz)[:-4] output_fname = fname + '.gen_' + str(args.spkr) elif args.text is not '': txt = text2phone(args.text, char2code) #feat = torch.FloatTensor(500, 67) feat = torch.FloatTensor(1500, 67) spkr = torch.LongTensor([args.spkr]) txt = Variable(txt.unsqueeze(1), volatile=True) feat = Variable(feat.unsqueeze(1), volatile=True) spkr = Variable(spkr, volatile=True) fname = args.text.replace(' ', '_') output_fname = fname + '.gen_' + str(args.spkr) else: print('ERROR: Must supply npz file path or text as source.') return if args.gpu >= 0: txt = txt.cuda() feat = feat.cuda() spkr = spkr.cuda() out, attn = model([txt, spkr], feat) out, attn = trim_pred(out, attn) output_dir = os.path.join(os.path.dirname(args.checkpoint), 'results') if not os.path.exists(output_dir): os.makedirs(output_dir) #''' generate_merlin_wav(out.data.cpu().numpy(), output_dir, output_fname, norm_path) #''' #out.data.cpu().numpy().tofile(output_fname) if args.npz is not '': output_orig_fname = os.path.basename(args.npz)[:-4] + '.orig' generate_merlin_wav(feat[:, 0, :].data.cpu().numpy(), output_dir, output_orig_fname, norm_path)
def main(): # load datasets train_dataset_path = os.path.join(args.data, 'numpy_features') train = NpzFolder(train_dataset_path) train.remove_too_long_seq(args.max_seq_len) train_loader = Dataset_Iter(train, batch_size=args.batch_size) train_loader.shuffle() valid_dataset_path = os.path.join(args.data, 'numpy_features_valid') valid = NpzFolder(valid_dataset_path) valid_loader = Dataset_Iter(valid, batch_size=args.batch_size) valid_loader.shuffle() # train_loader = Dataset_Iter(valid, batch_size=args.batch_size) # initiate tensorflow model input0 = tf.placeholder(tf.int64, [None, None]) input1 = tf.placeholder(tf.float32, [None]) # contains length of sentence speaker = tf.placeholder(tf.int32, [None, 1]) # speaker identity target0 = tf.placeholder(tf.float32, [None, None, 63]) target1 = tf.placeholder(tf.float32, [None]) # apparently speaker identity # idente = tf.placeholder(tf.float32, [None,256]) # s_t = tf.placeholder(tf.float32, [64,319,20]) # mu_t = tf.placeholder(tf.float32, [64,10]) # context = tf.placeholder(tf.float32, [64,64,256]) start = tf.placeholder(tf.bool, shape=(), name='start_new_batch') train_flag = tf.placeholder(tf.bool, shape=(), name='train_flag') # out_seq = tf.placeholder(tf.float32, [None, None, 63]) # attns_seq = tf.placeholder(tf.float32, [None, None, 63]) model = Loop(args) # Define loss and optimizer output, attns = model.forward(input0, speaker, target0, start, train_flag) loss_op = MaskedMSE(output, target0, target1) optimizer = tf.train.AdamOptimizer(learning_rate=args.lr) train_op, clip_flag = gradient_check_and_clip(loss_op, optimizer, args.clip_grad, args.ignore_grad) merged = tf.summary.merge_all() # Initialize the variables (i.e. assign their default value) init = tf.global_variables_initializer() # Add ops to save and restore all the variables. saver = tf.train.Saver(global_variable_list) load_model = not args.checkpoint == '' save_model = True best_eval = float('inf') sess_idx = 0 train_losses = [] valid_losses = [] with tf.Session() as sess: # Run the initializer train_writer = tf.summary.FileWriter( "%s/%s/train" % (args.outpath, expName), sess.graph) valid_writer = tf.summary.FileWriter( "%s/%s/valid" % (args.outpath, expName), sess.graph) # Restore variables from disk. sess.run(init) if load_model: saver.restore(sess, args.checkpoint) print("Model restored from file: %s" % args.checkpoint) for epoch in range(args.epochs): train_enum = tqdm(train_loader, desc='Train epoch %d' % epoch, total=ceil_on_division(len(train_loader), args.batch_size)) # Train data for batch_ind in train_enum: batch_loss_list = [] (srcBatch, srcLengths), (tgtBatch, tgtLengths), full_spkr = \ make_a_batch(train_loader.dataset, batch_ind) batch_iter = TBPTTIter((srcBatch, srcLengths), (tgtBatch, tgtLengths), full_spkr, args.seq_len) for (srcBatch, srcLenths), (tgtBatch, tgtLengths), spkr, start2 in batch_iter: loss, _, clip_flag1, summary = sess.run( [loss_op, train_op, clip_flag, merged], feed_dict={ input0: srcBatch, speaker: spkr, target0: tgtBatch, target1: tgtLengths, start: start2, train_flag: True }) train_writer.add_summary(summary, sess_idx) sess_idx += 1 if not clip_flag1: batch_loss_list.append(loss) else: print( '-' ) # if too many - appear, there are exploding gradients train_losses.append(batch_loss_list) if len(batch_loss_list) != 0: batch_loss = sum(batch_loss_list) / len(batch_loss_list) batch_loss_list.append(batch_loss) else: batch_loss = -1. train_enum.set_description('Train (loss %.2f) epoch %d' % (batch_loss, epoch)) train_enum.update(srcBatch.shape[0]) # Validate data valid_enum = tqdm(valid_loader, desc='Validating epoch %d' % epoch, total=ceil_on_division(len(valid_loader), args.batch_size)) batch_loss_list = [] for batch_ind in valid_enum: (srcBatch, srcLengths), (tgtBatch, tgtLengths), full_spkr = \ make_a_batch(valid_loader.dataset, batch_ind) loss, summary = sess.run( [loss_op, merged], feed_dict={ input0: srcBatch, speaker: full_spkr, target0: tgtBatch, target1: tgtLengths, start: True, train_flag: False }) batch_loss_list.append(loss) train_enum.set_description('Train (loss %.2f) epoch %d' % (loss, epoch)) valid_writer.add_summary(summary, sess_idx) sess_idx += 1 valid_enum.set_description('Validating (loss %.2f) epoch %d' % (loss, epoch)) if len(batch_loss_list) != 0: valid_losses.append(batch_loss_list) valid_loss = sum(batch_loss_list) / len(batch_loss_list) else: valid_loss = 99999. if valid_loss < best_eval and save_model: best_eval = valid_loss save_path = saver.save(sess, "%s/bestmodel.ckpt" % args.expName) print("NEW BEST MODEL!, model saved in file: %s" % save_path) print('Final validation loss for epoch %d is: %.2f' % (epoch, valid_loss)) train_loader.shuffle() valid_loader.shuffle() if save_model: save_path = saver.save(sess, "%s/model.ckpt" % args.expName) print("Model saved in file: %s" % save_path) train_writer.close() valid_writer.close()