def get_loader(data_path='data/vctk',
               max_seq_len=1000,
               batch_size=64,
               nspk=22):
    dataset = NpzFolder(data_path + '/numpy_features_valid', nspk == 1)
    loader = NpzLoader(dataset,
                       max_seq_len=max_seq_len,
                       batch_size=batch_size,
                       num_workers=4,
                       pin_memory=True)
    return loader
def get_loaders(data_path='data/vctk',
                max_seq_len=1000,
                batch_size=64,
                nspk=22):
    # wrap train dataset
    train_dataset = NpzFolder(data_path + '/numpy_features', nspk == 1)
    train_loader = NpzLoader(train_dataset,
                             max_seq_len=max_seq_len,
                             batch_size=batch_size,
                             num_workers=4,
                             pin_memory=True,
                             shuffle=True)

    # wrap validation dataset
    valid_dataset = NpzFolder(data_path + '/numpy_features_valid', nspk == 1)
    valid_loader = NpzLoader(valid_dataset,
                             max_seq_len=max_seq_len,
                             batch_size=batch_size,
                             num_workers=4,
                             pin_memory=True)

    return train_loader, valid_loader
示例#3
0
def main():
    parser = argparse.ArgumentParser(description='PyTorch Loop')
    # Env options:
    parser.add_argument('--epochs', type=int, default=92, metavar='N',
                        help='number of epochs to train (default: 92)')
    parser.add_argument('--seed', type=int, default=10, metavar='S',
                        help='random seed (default: 3)')
    parser.add_argument('--expName', type=str, default='vctk', metavar='E',
                        help='Experiment name')
    parser.add_argument('--data', default='data/vctk',
                        metavar='D', type=str, help='Data path')
    parser.add_argument('--checkpoint', default='',
                        metavar='C', type=str, help='Checkpoint path')
    parser.add_argument('--gpu', default=0,
                        metavar='G', type=int, help='GPU device ID')
    # Data options
    parser.add_argument('--max-seq-len', type=int, default=1000,
                        help='Max sequence length for tbptt')
    parser.add_argument('--batch-size', type=int, default=64,
                        help='Batch size')
    # Model options
    parser.add_argument('--nspk', type=int, default=22,
                        help='Number of speakers')

    # init
    args = parser.parse_args()
    args.expName = os.path.join('checkpoints', args.expName)
    torch.cuda.set_device(args.gpu)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    logging = create_output_dir(args)

    # data
    valid_dataset = NpzFolder(args.data + '/numpy_features_valid', args.nspk == 1)
    valid_loader = NpzLoader(valid_dataset,
                             max_seq_len=args.max_seq_len,
                             batch_size=args.batch_size,
                             num_workers=4,
                             pin_memory=True)

    # load model
    model, norm = model_def(args.checkpoint, gpu=args.gpu, valid_loader=valid_loader)

    # Begin!
    eval_loss = evaluate(model, norm, valid_loader, logging)
示例#4
0
                    help='Memory number of segments')


# init
args = parser.parse_args()
args.expName = os.path.join('checkpoints', args.expName)
torch.cuda.set_device(args.gpu)
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)
logging = create_output_dir(args)
vis = visdom.Visdom(env=args.expName)


# data
logging.info("Building dataset.")
train_dataset = NpzFolder(args.data + '/numpy_features', args.nspk == 1)
train_loader = NpzLoader(train_dataset,
                         max_seq_len=args.max_seq_len,
                         batch_size=args.batch_size,
                         num_workers=4,
                         pin_memory=True,
                         shuffle=True)

valid_dataset = NpzFolder(args.data + '/numpy_features_valid', args.nspk == 1)
valid_loader = NpzLoader(valid_dataset,
                         max_seq_len=args.max_seq_len,
                         batch_size=args.batch_size,
                         num_workers=4,
                         pin_memory=True)

logging.info("Dataset ready!")
示例#5
0
                    help='Memory number of segments')


# init
args = parser.parse_args()
args.expName = os.path.join('checkpoints', args.expName)
torch.cuda.set_device(args.gpu)
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)
logging = create_output_dir(args)
vis = visdom.Visdom(env=args.expName)


# data
logging.info("Building dataset.")
train_dataset = NpzFolder(args.data + '/numpy_features')
train_loader = NpzLoader(train_dataset,
                         max_seq_len=args.max_seq_len,
                         batch_size=args.batch_size,
                         num_workers=4,
                         pin_memory=True,
                         shuffle=True)

valid_dataset = NpzFolder(args.data + '/numpy_features_valid')
valid_loader = NpzLoader(valid_dataset,
                         max_seq_len=args.max_seq_len,
                         batch_size=args.batch_size,
                         num_workers=4,
                         pin_memory=True)

logging.info("Dataset ready!")
示例#6
0
def main():
    weights = torch.load(args.checkpoint,
                         map_location=lambda storage, loc: storage)
    opt = torch.load(os.path.dirname(args.checkpoint) + '/args.pth')
    train_args = opt[0]

    train_dataset = NpzFolder(train_args.data + '/numpy_features')
    char2code = train_dataset.dict
    spkr2code = train_dataset.speakers

    norm_path = train_args.data + '/norm_info/norm.dat.npy'
    train_args.noise = 0

    model = Loop(train_args)
    model.load_state_dict(weights)
    if args.gpu >= 0:
        model.cuda()
    model.eval()

    if args.spkr not in range(len(spkr2code)):
        print('ERROR: Unknown speaker id: %d.' % args.spkr)
        return

    txt, feat, spkr, output_fname = None, None, None, None
    if args.npz is not '':
        txt, feat = npy_loader_phonemes(args.npz)

        txt = Variable(txt.unsqueeze(1), volatile=True)
        feat = Variable(feat.unsqueeze(1), volatile=True)
        spkr = Variable(torch.LongTensor([args.spkr]), volatile=True)

        fname = os.path.basename(args.npz)[:-4]
        output_fname = fname + '.gen_' + str(args.spkr)
    elif args.text is not '':
        txt = text2phone(args.text, char2code)
        #feat = torch.FloatTensor(500, 67)
        feat = torch.FloatTensor(1500, 67)
        spkr = torch.LongTensor([args.spkr])

        txt = Variable(txt.unsqueeze(1), volatile=True)
        feat = Variable(feat.unsqueeze(1), volatile=True)
        spkr = Variable(spkr, volatile=True)

        fname = args.text.replace(' ', '_')
        output_fname = fname + '.gen_' + str(args.spkr)
    else:
        print('ERROR: Must supply npz file path or text as source.')
        return

    if args.gpu >= 0:
        txt = txt.cuda()
        feat = feat.cuda()
        spkr = spkr.cuda()

    out, attn = model([txt, spkr], feat)
    out, attn = trim_pred(out, attn)

    output_dir = os.path.join(os.path.dirname(args.checkpoint), 'results')
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    #'''
    generate_merlin_wav(out.data.cpu().numpy(), output_dir, output_fname,
                        norm_path)
    #'''
    #out.data.cpu().numpy().tofile(output_fname)

    if args.npz is not '':
        output_orig_fname = os.path.basename(args.npz)[:-4] + '.orig'
        generate_merlin_wav(feat[:, 0, :].data.cpu().numpy(), output_dir,
                            output_orig_fname, norm_path)
示例#7
0
def main():
    # load datasets
    train_dataset_path = os.path.join(args.data, 'numpy_features')
    train = NpzFolder(train_dataset_path)
    train.remove_too_long_seq(args.max_seq_len)
    train_loader = Dataset_Iter(train, batch_size=args.batch_size)
    train_loader.shuffle()

    valid_dataset_path = os.path.join(args.data, 'numpy_features_valid')
    valid = NpzFolder(valid_dataset_path)
    valid_loader = Dataset_Iter(valid, batch_size=args.batch_size)
    valid_loader.shuffle()

    # train_loader = Dataset_Iter(valid, batch_size=args.batch_size)

    # initiate tensorflow model
    input0 = tf.placeholder(tf.int64, [None, None])
    input1 = tf.placeholder(tf.float32, [None])  # contains length of sentence
    speaker = tf.placeholder(tf.int32, [None, 1])  # speaker identity
    target0 = tf.placeholder(tf.float32, [None, None, 63])
    target1 = tf.placeholder(tf.float32, [None])  # apparently speaker identity
    # idente  = tf.placeholder(tf.float32, [None,256])
    # s_t = tf.placeholder(tf.float32, [64,319,20])
    # mu_t = tf.placeholder(tf.float32, [64,10])
    # context  = tf.placeholder(tf.float32, [64,64,256])
    start = tf.placeholder(tf.bool, shape=(), name='start_new_batch')
    train_flag = tf.placeholder(tf.bool, shape=(), name='train_flag')
    # out_seq = tf.placeholder(tf.float32, [None, None, 63])
    # attns_seq = tf.placeholder(tf.float32, [None, None, 63])

    model = Loop(args)

    # Define loss and optimizer
    output, attns = model.forward(input0, speaker, target0, start, train_flag)
    loss_op = MaskedMSE(output, target0, target1)
    optimizer = tf.train.AdamOptimizer(learning_rate=args.lr)
    train_op, clip_flag = gradient_check_and_clip(loss_op, optimizer,
                                                  args.clip_grad,
                                                  args.ignore_grad)
    merged = tf.summary.merge_all()

    # Initialize the variables (i.e. assign their default value)
    init = tf.global_variables_initializer()

    # Add ops to save and restore all the variables.
    saver = tf.train.Saver(global_variable_list)
    load_model = not args.checkpoint == ''
    save_model = True
    best_eval = float('inf')
    sess_idx = 0
    train_losses = []
    valid_losses = []
    with tf.Session() as sess:
        # Run the initializer

        train_writer = tf.summary.FileWriter(
            "%s/%s/train" % (args.outpath, expName), sess.graph)
        valid_writer = tf.summary.FileWriter(
            "%s/%s/valid" % (args.outpath, expName), sess.graph)

        # Restore variables from disk.
        sess.run(init)
        if load_model:
            saver.restore(sess, args.checkpoint)
            print("Model restored from file: %s" % args.checkpoint)

        for epoch in range(args.epochs):
            train_enum = tqdm(train_loader,
                              desc='Train epoch %d' % epoch,
                              total=ceil_on_division(len(train_loader),
                                                     args.batch_size))
            # Train data
            for batch_ind in train_enum:
                batch_loss_list = []
                (srcBatch, srcLengths), (tgtBatch, tgtLengths), full_spkr = \
                    make_a_batch(train_loader.dataset, batch_ind)
                batch_iter = TBPTTIter((srcBatch, srcLengths),
                                       (tgtBatch, tgtLengths), full_spkr,
                                       args.seq_len)
                for (srcBatch,
                     srcLenths), (tgtBatch,
                                  tgtLengths), spkr, start2 in batch_iter:
                    loss, _, clip_flag1, summary = sess.run(
                        [loss_op, train_op, clip_flag, merged],
                        feed_dict={
                            input0: srcBatch,
                            speaker: spkr,
                            target0: tgtBatch,
                            target1: tgtLengths,
                            start: start2,
                            train_flag: True
                        })
                    train_writer.add_summary(summary, sess_idx)
                    sess_idx += 1
                    if not clip_flag1:
                        batch_loss_list.append(loss)
                    else:
                        print(
                            '-'
                        )  # if too many - appear, there are exploding gradients
                train_losses.append(batch_loss_list)
                if len(batch_loss_list) != 0:
                    batch_loss = sum(batch_loss_list) / len(batch_loss_list)
                    batch_loss_list.append(batch_loss)
                else:
                    batch_loss = -1.
                train_enum.set_description('Train (loss %.2f) epoch %d' %
                                           (batch_loss, epoch))
                train_enum.update(srcBatch.shape[0])

            # Validate data
            valid_enum = tqdm(valid_loader,
                              desc='Validating epoch %d' % epoch,
                              total=ceil_on_division(len(valid_loader),
                                                     args.batch_size))
            batch_loss_list = []
            for batch_ind in valid_enum:
                (srcBatch, srcLengths), (tgtBatch, tgtLengths), full_spkr = \
                    make_a_batch(valid_loader.dataset, batch_ind)

                loss, summary = sess.run(
                    [loss_op, merged],
                    feed_dict={
                        input0: srcBatch,
                        speaker: full_spkr,
                        target0: tgtBatch,
                        target1: tgtLengths,
                        start: True,
                        train_flag: False
                    })
                batch_loss_list.append(loss)
                train_enum.set_description('Train (loss %.2f) epoch %d' %
                                           (loss, epoch))
                valid_writer.add_summary(summary, sess_idx)
                sess_idx += 1
                valid_enum.set_description('Validating (loss %.2f) epoch %d' %
                                           (loss, epoch))
            if len(batch_loss_list) != 0:
                valid_losses.append(batch_loss_list)
                valid_loss = sum(batch_loss_list) / len(batch_loss_list)
            else:
                valid_loss = 99999.
            if valid_loss < best_eval and save_model:
                best_eval = valid_loss
                save_path = saver.save(sess,
                                       "%s/bestmodel.ckpt" % args.expName)
                print("NEW BEST MODEL!, model saved in file: %s" % save_path)
            print('Final validation loss for epoch %d is: %.2f' %
                  (epoch, valid_loss))
            train_loader.shuffle()
            valid_loader.shuffle()

        if save_model:
            save_path = saver.save(sess, "%s/model.ckpt" % args.expName)
            print("Model saved in file: %s" % save_path)

        train_writer.close()
        valid_writer.close()