Пример #1
0
def main(argv):
    args = utils.parse_args("Train a wavenet model")
    utils.redirect_log_to_file(args.model_dir)

    hparams = create_hparams(args.model_dir, args.configs, initialize=True)
    utils.check_git_hash(args.model_dir)

    # Prepare data
    data.load_vocab(hparams)
    train_input_fn = data.InputPipeline(hparams, tf.estimator.ModeKeys.TRAIN)
    eval_input_fn = data.InputPipeline(hparams, tf.estimator.ModeKeys.EVAL)

    # Training
    train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn,
                                        max_steps=hparams.train_steps)
    eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn,
                                      steps=hparams.eval_steps,
                                      throttle_secs=hparams.throttle_secs)

    distribution = tf.contrib.distribute.MirroredStrategy()
    run_config = tf.estimator.RunConfig(
        model_dir=args.model_dir,
        train_distribute=distribution,
        save_summary_steps=hparams.save_summary_steps,
        save_checkpoints_secs=hparams.save_checkpoints_secs,
        keep_checkpoint_max=hparams.n_checkpoints)
    estimator = tf.estimator.Estimator(model_fn=model.build_model_fn(hparams),
                                       config=run_config,
                                       model_dir=args.model_dir)

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def main():
  parser = utils.get_argument_parser("Decode by using the trained model")
  parser.add_argument("--checkpoint", dest="checkpoint", help="Path to a checkpoint file. Default is the latest version.")
  parser.add_argument("--limit", type=int, default=0, help="The number of sentences to be decoded. (default=unlimited)")
  parser.add_argument("--use_eval", action="store_true", help="Use evaluation dataset for prediction")
  parser.add_argument("--predict_file", type=str, default="", help="Path to a text file to be translated")
  parser.add_argument("--out_file", type=str, default="", help="Path to a text file to write")
  args = parser.parse_args()

  hparams = create_hparams(args.model_dir, args.configs, initialize=False)
  utils.check_git_hash(args.model_dir)

  data.load_vocab(hparams)
  if args.use_eval:
    pipeline = data.InputPipeline(hparams.source_eval_file, None, None, tf.estimator.ModeKeys.PREDICT, hparams)
  else:
    pipeline = data.InputPipeline(args.predict_file, None, None, tf.estimator.ModeKeys.PREDICT, hparams)

  estimator = tf.estimator.Estimator(model_fn=model.build_model_fn(hparams), model_dir=args.model_dir)

  # set a file path to write
  if args.out_file != "":
    f = open(args.out_file, 'w')
  else:
    f = None

  for i, prediction in enumerate(estimator.predict(pipeline, checkpoint_path=args.checkpoint)):
    if args.limit and i == args.limit:
      break
    token_ids = prediction.tolist()
    print(hparams.target_vocab.decode(token_ids), file=f)
    if i % 1000 == 0:
      tf.logging.info("write: %d", i)
Пример #3
0
def train_and_eval(rank, n_gpus, hps):
  global global_step
  if rank == 0:
    logger = utils.get_logger(hps.model_dir)
    logger.info(hps)
    utils.check_git_hash(hps.model_dir)
    writer = SummaryWriter(log_dir=hps.model_dir)
    writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval"))

  dist.init_process_group(backend='nccl', init_method='env://', world_size=n_gpus, rank=rank)
  torch.manual_seed(hps.train.seed)
  torch.cuda.set_device(rank)

  train_dataset = TextMelLoader(hps.data.training_files, hps.data)
  train_sampler = torch.utils.data.distributed.DistributedSampler(
      train_dataset,
      num_replicas=n_gpus,
      rank=rank,
      shuffle=True)
  collate_fn = TextMelCollate(1)
  train_loader = DataLoader(train_dataset, num_workers=8, shuffle=False,
      batch_size=hps.train.batch_size, pin_memory=True,
      drop_last=True, collate_fn=collate_fn, sampler=train_sampler)
  if rank == 0:
    val_dataset = TextMelLoader(hps.data.validation_files, hps.data)
    val_loader = DataLoader(val_dataset, num_workers=8, shuffle=False,
        batch_size=hps.train.batch_size, pin_memory=True,
        drop_last=True, collate_fn=collate_fn)

  generator = models.FlowGenerator(
      n_vocab=len(symbols), 
      out_channels=hps.data.n_mel_channels, 
      **hps.model).cuda(rank)
  optimizer_g = commons.Adam(generator.parameters(), scheduler=hps.train.scheduler, dim_model=hps.model.hidden_channels, warmup_steps=hps.train.warmup_steps, lr=hps.train.learning_rate, betas=hps.train.betas, eps=hps.train.eps)
  if hps.train.fp16_run:
    generator, optimizer_g._optim = amp.initialize(generator, optimizer_g._optim, opt_level="O1")
  generator = DDP(generator)
  epoch_str = 1
  global_step = 0
  try:
    _, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "G_*.pth"), generator, optimizer_g)
    epoch_str += 1
    optimizer_g.step_num = (epoch_str - 1) * len(train_loader)
    optimizer_g._update_learning_rate()
    global_step = (epoch_str - 1) * len(train_loader)
  except:
    if hps.train.ddi and os.path.isfile(os.path.join(hps.model_dir, "ddi_G.pth")):
      _ = utils.load_checkpoint(os.path.join(hps.model_dir, "ddi_G.pth"), generator, optimizer_g)
  
  for epoch in range(epoch_str, hps.train.epochs + 1):
    if rank==0:
      train(rank, epoch, hps, generator, optimizer_g, train_loader, logger, writer)
      evaluate(rank, epoch, hps, generator, optimizer_g, val_loader, logger, writer_eval)
      if epoch%50 == 0:
        utils.save_checkpoint(generator, optimizer_g, hps.train.learning_rate, epoch, os.path.join(hps.model_dir, "G_{}.pth".format(epoch)))
    else:
      train(rank, epoch, hps, generator, optimizer_g, train_loader, None, None)
Пример #4
0
def main():
    parser = utils.get_argument_parser("Decode by using the trained model")
    parser.add_argument(
        "--checkpoint",
        dest="checkpoint",
        help="Path to a checkpoint file. Default is the latest version.")
    parser.add_argument(
        "--limit",
        type=int,
        default=1,
        help="The number of sentences to be decoded. (0=unlimited)")
    parser.add_argument("--use_eval",
                        action="store_true",
                        help="Use evaluation dataset for prediction")
    parser.add_argument("--predict_dir",
                        type=str,
                        default="",
                        help="Path to a local condition file dir")
    parser.add_argument("--out_dir",
                        type=str,
                        default="",
                        help="Path to a wav file dir to write")
    args = parser.parse_args()

    hparams = create_hparams(args.model_dir, args.configs, initialize=False)
    utils.check_git_hash(args.model_dir)

    if not os.path.isdir(args.out_dir):
        os.mkdir(os.path.relpath(args.out_dir))

    data.load_vocab(hparams)
    if args.use_eval:
        input_fn = data.InputPipeline(hparams, tf.estimator.ModeKeys.EVAL)
    else:
        raise NotImplementedError("File to mel or wav is not avaliable now.")

    estimator = tf.estimator(model_fn=model.build_model_fn(hparams),
                             model_dir=args.model_dir)

    for i, prediction in enumerate(
            estimator.predict(input_fn, checkpoint_path=args.checkpoint)):
        for j, wav in predictions.tolist():
            wav = wav.astype(np.float32)
            write(
                os.path.join(args.out_dir,
                             "{}.wav".format(i * hparams.infer_batch_size +
                                             i)), hparams.sample_rate, wav)

        if args.limit and i + 1 == args.limit:
            break
Пример #5
0
def main():
    hps = utils.get_hparams()
    logger = utils.get_logger(hps.model_dir)
    logger.info(hps)
    utils.check_git_hash(hps.model_dir)

    torch.manual_seed(hps.train.seed)

    train_dataset = TextMelLoader(hps.data.training_files, hps.data)
    collate_fn = TextMelCollate(1)
    train_loader = DataLoader(train_dataset,
                              num_workers=8,
                              shuffle=True,
                              batch_size=hps.train.batch_size,
                              pin_memory=True,
                              drop_last=True,
                              collate_fn=collate_fn)

    generator = FlowGenerator_DDI(speaker_dim=hps.model.speaker_embedding,
                                  n_vocab=len(symbols),
                                  out_channels=hps.data.n_mel_channels,
                                  **hps.model).cuda()
    optimizer_g = commons.Adam(generator.parameters(),
                               scheduler=hps.train.scheduler,
                               dim_model=hps.model.hidden_channels,
                               warmup_steps=hps.train.warmup_steps,
                               lr=hps.train.learning_rate,
                               betas=hps.train.betas,
                               eps=hps.train.eps)

    generator.train()
    for batch_idx, (x, x_lengths, y, y_lengths,
                    speaker_embedding) in enumerate(train_loader):
        x, x_lengths = x.cuda(), x_lengths.cuda()
        y, y_lengths = y.cuda(), y_lengths.cuda()
        speaker_embedding = speaker_embedding.cuda()

        _ = generator(x, x_lengths, speaker_embedding, y, y_lengths, gen=False)
        break

    utils.save_checkpoint(generator, optimizer_g, hps.train.learning_rate, 0,
                          os.path.join(hps.model_dir, "ddi_G.pth"))
Пример #6
0
def main(argv):
    args = utils.parse_args("Train a transformer model")
    utils.redirect_log_to_file(args.model_dir)

    hparams = create_hparams(args.model_dir, args.configs, initialize=True)
    utils.check_git_hash(args.model_dir)

    # Prepare data
    data.load_vocab(hparams)

    train_input_fn = data.InputPipeline(None, None, hparams.record_train_file,
                                        tf.estimator.ModeKeys.TRAIN, hparams)
    eval_input_fn = data.InputPipeline(None, None, hparams.record_eval_file,
                                       tf.estimator.ModeKeys.EVAL, hparams)

    # Training
    log_samples_hook = tf.train.LoggingTensorHook(
        ['targets', 'predictions'],
        at_end=True,
        formatter=tensors_to_string(hparams))

    train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn,
                                        max_steps=hparams.train_steps)
    eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn,
                                      steps=hparams.eval_steps,
                                      hooks=[log_samples_hook])

    distribution = tf.contrib.distribute.MirroredStrategy()
    run_config = tf.estimator.RunConfig(
        model_dir=args.model_dir,
        train_distribute=distribution,
        save_summary_steps=hparams.save_summary_steps,
        save_checkpoints_steps=hparams.save_checkpoints_steps,
        keep_checkpoint_max=hparams.n_checkpoints)
    estimator = tf.estimator.Estimator(model_fn=model.build_model_fn(hparams),
                                       config=run_config,
                                       model_dir=args.model_dir)

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Пример #7
0
def main():
  hps = utils.get_hparams()
  logger = utils.get_logger(hps.model_dir)
  logger.info(hps)
  utils.check_git_hash(hps.model_dir)

  torch.manual_seed(hps.train.seed)

  train_dataset = TextMelLoader(hps.data.training_files, hps.data)
  collate_fn = TextMelCollate(1)
  train_loader = DataLoader(train_dataset, num_workers=8, shuffle=True,
      batch_size=hps.train.batch_size, pin_memory=True,
      drop_last=True, collate_fn=collate_fn)

  generator = FlowGenerator_DDI(
      len(symbols), 
      out_channels=hps.data.n_mel_channels,
      **hps.model).cuda()
  optimizer_g = commons.Adam(generator.parameters(), scheduler=hps.train.scheduler, dim_model=hps.model.hidden_channels, warmup_steps=hps.train.warmup_steps, lr=hps.train.learning_rate, betas=hps.train.betas, eps=hps.train.eps)
   
  generator.train()
  for batch_idx, (x, x_lengths, y, y_lengths) in enumerate(train_loader):
    x, x_lengths = x.cuda(), x_lengths.cuda()
    y, y_lengths = y.cuda(), y_lengths.cuda()

    _ = generator(x, x_lengths, y, y_lengths, gen=False)
    break

  # check for pretrained and load it without a an optimizer
  pretrained_checkpoint_path = os.path.join(hps.model_dir, "pretrained.pth")
  if os.path.isfile(pretrained_checkpoint_path):
    logger.info("Loading pretrained checkpoint: %s" % pretrained_checkpoint_path)
    model, optimizer, learning_rate, iteration = utils.load_checkpoint(pretrained_checkpoint_path, generator)
    utils.save_checkpoint(model, optimizer_g, hps.train.learning_rate, 0, os.path.join(hps.model_dir, "ddi_G.pth"))
  else:
    utils.save_checkpoint(generator, optimizer_g, hps.train.learning_rate, 0, os.path.join(hps.model_dir, "ddi_G.pth"))
Пример #8
0
import json
import argparse
import torch
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader
from data import LJspeechDataset, collate_fn, collate_fn_synthesize

import models
import commons
import utils

hps = utils.get_hparams()
logger = utils.get_logger(hps.model_dir)
logger.info(hps)
utils.check_git_hash(hps.model_dir)

use_cuda = hps.train.use_cuda and torch.cuda.is_available()
torch.manual_seed(hps.train.seed)
device = torch.device("cuda" if use_cuda else "cpu")

kwargs = {'num_workers': 8, 'pin_memory': True} if use_cuda else {}
train_dataset = LJspeechDataset(hps.data.data_path, True, 0.1)
test_dataset = LJspeechDataset(hps.data.data_path, False, 0.1)
train_loader = DataLoader(train_dataset,
                          batch_size=hps.train.batch_size,
                          shuffle=True,
                          collate_fn=collate_fn,
                          **kwargs)
test_loader = DataLoader(test_dataset,
                         batch_size=hps.train.batch_size,
Пример #9
0
def run(rank, n_gpus, hps):
  global global_step
  if rank == 0:
    logger = utils.get_logger(hps.model_dir)
    logger.info(hps)
    utils.check_git_hash(hps.model_dir)
    writer = SummaryWriter(log_dir=hps.model_dir)
    writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval"))

  dist.init_process_group(backend='nccl', init_method='env://', world_size=n_gpus, rank=rank)
  torch.manual_seed(hps.train.seed)
  torch.cuda.set_device(rank)

  train_dataset = AudioSpecLoader(hps.data.training_files, hps.data)
  train_sampler = DistributedBucketSampler(
      train_dataset,
      hps.train.batch_size,
      [32,300,400,500,600,700,800,900,1000],
      num_replicas=n_gpus,
      rank=rank,
      shuffle=True)
  collate_fn = AudioSpecCollate()
  train_loader = DataLoader(train_dataset, num_workers=8, shuffle=False, pin_memory=True,
      collate_fn=collate_fn, batch_sampler=train_sampler)
  if rank == 0:
    eval_dataset = AudioSpecLoader(hps.data.validation_files, hps.data)
    eval_loader = DataLoader(eval_dataset, num_workers=8, shuffle=False,
        batch_size=hps.train.batch_size, pin_memory=True,
        drop_last=False, collate_fn=collate_fn)

  net_g = SynthesizerTrn(
      hps.data.filter_length // 2 + 1,
      hps.train.segment_size // hps.data.hop_length,
      n_speakers=hps.data.n_speakers,
      **hps.model).cuda(rank)
  net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank)
  optim_g = torch.optim.AdamW(
      net_g.parameters(), 
      hps.train.learning_rate, 
      betas=hps.train.betas, 
      eps=hps.train.eps)
  optim_d = torch.optim.AdamW(
      net_d.parameters(),
      hps.train.learning_rate, 
      betas=hps.train.betas, 
      eps=hps.train.eps)
  net_g = DDP(net_g, find_unused_parameters=True,device_ids=[rank])
  net_d = DDP(net_d, device_ids=[rank])

  try:
    _, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "G_*.pth"), net_g, optim_g)
    _, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "D_*.pth"), net_d, optim_d)
    global_step = (epoch_str - 1) * len(train_loader)
  except:
    epoch_str = 1
    global_step = 0

  scheduler_g = torch.optim.lr_scheduler.ExponentialLR(optim_g, gamma=hps.train.lr_decay, last_epoch=epoch_str-2)
  scheduler_d = torch.optim.lr_scheduler.ExponentialLR(optim_d, gamma=hps.train.lr_decay, last_epoch=epoch_str-2)

  scaler = GradScaler(enabled=hps.train.fp16_run)

  for epoch in range(epoch_str, hps.train.epochs + 1):
    if rank==0:
      train_and_evaluate(rank, epoch, hps, [net_g, net_d], [optim_g, optim_d], [scheduler_g, scheduler_d], scaler, [train_loader, eval_loader], logger, [writer, writer_eval])
    else:
      train_and_evaluate(rank, epoch, hps, [net_g, net_d], [optim_g, optim_d], [scheduler_g, scheduler_d], scaler, [train_loader, None], None, None)
    scheduler_g.step()
    scheduler_d.step()