Python GlowTTSLoss примеры использования

Язык программирования: Python

Пространство имен/Пакет: TTS.tts.layers.losses

Класс/Тип: GlowTTSLoss

Примеров на hotexamples.com: 6

Python GlowTTSLoss - 6 примеров найдено. Это лучшие примеры Python кода для TTS.tts.layers.losses.GlowTTSLoss, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

GlowTTSLoss(6)

cuda(2)

Основные методы

GlowTTSLoss (6)

cuda (2)

Пример #1

Показать файл

Файл: test_glow_tts.py Проект: gerazov/TTS

    def test_train_step():
        input_dummy = torch.randint(0, 24, (8, 128)).long().to(device)
        input_lengths = torch.randint(100, 129, (8, )).long().to(device)
        input_lengths[-1] = 128
        mel_spec = torch.rand(8, 30, c.audio["num_mels"]).to(device)
        mel_lengths = torch.randint(20, 30, (8, )).long().to(device)
        speaker_ids = torch.randint(0, 5, (8, )).long().to(device)

        criterion = GlowTTSLoss()

        # model to train
        config = GlowTTSConfig(num_chars=32)
        model = GlowTTS(config).to(device)

        # reference model to compare model weights
        model_ref = GlowTTS(config).to(device)

        model.train()
        print(" > Num parameters for GlowTTS model:%s" %
              (count_parameters(model)))

        # pass the state to ref model
        model_ref.load_state_dict(copy.deepcopy(model.state_dict()))

        count = 0
        for param, param_ref in zip(model.parameters(),
                                    model_ref.parameters()):
            assert (param - param_ref).sum() == 0, param
            count += 1

        optimizer = optim.Adam(model.parameters(), lr=0.001)
        for _ in range(5):
            optimizer.zero_grad()
            outputs = model.forward(input_dummy, input_lengths, mel_spec,
                                    mel_lengths, None)
            loss_dict = criterion(
                outputs["z"],
                outputs["y_mean"],
                outputs["y_log_scale"],
                outputs["logdet"],
                mel_lengths,
                outputs["durations_log"],
                outputs["total_durations_log"],
                input_lengths,
            )
            loss = loss_dict["loss"]
            loss.backward()
            optimizer.step()

        # check parameter changes
        count = 0
        for param, param_ref in zip(model.parameters(),
                                    model_ref.parameters()):
            assert (param != param_ref).any(
            ), "param {} with shape {} not updated!! \n{}\n{}".format(
                count, param.shape, param, param_ref)
            count += 1

Пример #2

Показать файл

 def test_train_step(self):
     batch_size = BATCH_SIZE
     input_dummy, input_lengths, mel_spec, mel_lengths, speaker_ids = self._create_inputs(
         batch_size)
     criterion = GlowTTSLoss()
     # model to train
     config = GlowTTSConfig(num_chars=32)
     model = GlowTTS(config).to(device)
     # reference model to compare model weights
     model_ref = GlowTTS(config).to(device)
     model.train()
     print(" > Num parameters for GlowTTS model:%s" %
           (count_parameters(model)))
     # pass the state to ref model
     model_ref.load_state_dict(copy.deepcopy(model.state_dict()))
     count = 0
     for param, param_ref in zip(model.parameters(),
                                 model_ref.parameters()):
         assert (param - param_ref).sum() == 0, param
         count += 1
     optimizer = optim.Adam(model.parameters(), lr=0.001)
     for _ in range(5):
         optimizer.zero_grad()
         outputs = model.forward(input_dummy, input_lengths, mel_spec,
                                 mel_lengths, None)
         loss_dict = criterion(
             outputs["z"],
             outputs["y_mean"],
             outputs["y_log_scale"],
             outputs["logdet"],
             mel_lengths,
             outputs["durations_log"],
             outputs["total_durations_log"],
             input_lengths,
         )
         loss = loss_dict["loss"]
         loss.backward()
         optimizer.step()
     # check parameter changes
     self._check_parameter_changes(model, model_ref)

Пример #3

Показать файл

def main(args):  # pylint: disable=redefined-outer-name
    # pylint: disable=global-variable-undefined
    global meta_data_train, meta_data_eval, symbols, phonemes, model_characters, speaker_mapping
    # Audio processor
    ap = AudioProcessor(**c.audio)
    if 'characters' in c.keys():
        symbols, phonemes = make_symbols(**c.characters)

    # DISTRUBUTED
    if num_gpus > 1:
        init_distributed(args.rank, num_gpus, args.group_id,
                         c.distributed["backend"], c.distributed["url"])

    # set model characters
    model_characters = phonemes if c.use_phonemes else symbols
    num_chars = len(model_characters)

    # load data instances
    meta_data_train, meta_data_eval = load_meta_data(c.datasets)

    # set the portion of the data used for training
    if 'train_portion' in c.keys():
        meta_data_train = meta_data_train[:int(
            len(meta_data_train) * c.train_portion)]
    if 'eval_portion' in c.keys():
        meta_data_eval = meta_data_eval[:int(
            len(meta_data_eval) * c.eval_portion)]

    # parse speakers
    num_speakers, speaker_embedding_dim, speaker_mapping = parse_speakers(
        c, args, meta_data_train, OUT_PATH)

    # setup model
    model = setup_model(num_chars,
                        num_speakers,
                        c,
                        speaker_embedding_dim=speaker_embedding_dim)
    optimizer = RAdam(model.parameters(),
                      lr=c.lr,
                      weight_decay=0,
                      betas=(0.9, 0.98),
                      eps=1e-9)
    criterion = GlowTTSLoss()

    if args.restore_path:
        print(f" > Restoring from {os.path.basename(args.restore_path)} ...")
        checkpoint = torch.load(args.restore_path, map_location='cpu')
        try:
            # TODO: fix optimizer init, model.cuda() needs to be called before
            # optimizer restore
            optimizer.load_state_dict(checkpoint['optimizer'])
            if c.reinit_layers:
                raise RuntimeError
            model.load_state_dict(checkpoint['model'])
        except:  #pylint: disable=bare-except
            print(" > Partial model initialization.")
            model_dict = model.state_dict()
            model_dict = set_init_dict(model_dict, checkpoint['model'], c)
            model.load_state_dict(model_dict)
            del model_dict

        for group in optimizer.param_groups:
            group['initial_lr'] = c.lr
        print(f" > Model restored from step {checkpoint['step']:d}",
              flush=True)
        args.restore_step = checkpoint['step']
    else:
        args.restore_step = 0

    if use_cuda:
        model.cuda()
        criterion.cuda()

    # DISTRUBUTED
    if num_gpus > 1:
        model = DDP_th(model, device_ids=[args.rank])

    if c.noam_schedule:
        scheduler = NoamLR(optimizer,
                           warmup_steps=c.warmup_steps,
                           last_epoch=args.restore_step - 1)
    else:
        scheduler = None

    num_params = count_parameters(model)
    print("\n > Model has {} parameters".format(num_params), flush=True)

    if args.restore_step == 0 or not args.best_path:
        best_loss = float('inf')
        print(" > Starting with inf best loss.")
    else:
        print(" > Restoring best loss from "
              f"{os.path.basename(args.best_path)} ...")
        best_loss = torch.load(args.best_path,
                               map_location='cpu')['model_loss']
        print(f" > Starting with loaded last best loss {best_loss}.")
    keep_all_best = c.get('keep_all_best', False)
    keep_after = c.get('keep_after', 10000)  # void if keep_all_best False

    # define dataloaders
    train_loader = setup_loader(ap, 1, is_val=False, verbose=True)
    eval_loader = setup_loader(ap, 1, is_val=True, verbose=True)

    global_step = args.restore_step
    model = data_depended_init(train_loader, model)
    for epoch in range(0, c.epochs):
        c_logger.print_epoch_start(epoch, c.epochs)
        train_avg_loss_dict, global_step = train(train_loader, model,
                                                 criterion, optimizer,
                                                 scheduler, ap, global_step,
                                                 epoch)
        eval_avg_loss_dict = evaluate(eval_loader, model, criterion, ap,
                                      global_step, epoch)
        c_logger.print_epoch_end(epoch, eval_avg_loss_dict)
        target_loss = train_avg_loss_dict['avg_loss']
        if c.run_eval:
            target_loss = eval_avg_loss_dict['avg_loss']
        best_loss = save_best_model(target_loss,
                                    best_loss,
                                    model,
                                    optimizer,
                                    global_step,
                                    epoch,
                                    c.r,
                                    OUT_PATH,
                                    model_characters,
                                    keep_all_best=keep_all_best,
                                    keep_after=keep_after)

Пример #4

Показать файл

    def test_train_step():
        input_dummy = torch.randint(0, 24, (8, 128)).long().to(device)
        input_lengths = torch.randint(100, 129, (8, )).long().to(device)
        input_lengths[-1] = 128
        mel_spec = torch.rand(8, c.audio['num_mels'], 30).to(device)
        linear_spec = torch.rand(8, 30, c.audio['fft_size']).to(device)
        mel_lengths = torch.randint(20, 30, (8, )).long().to(device)
        speaker_ids = torch.randint(0, 5, (8, )).long().to(device)

        criterion = criterion = GlowTTSLoss()

        # model to train
        model = GlowTts(
            num_chars=32,
            hidden_channels_enc=48,
            hidden_channels_dec=48,
            hidden_channels_dp=32,
            out_channels=80,
            encoder_type='rel_pos_transformer',
            encoder_params={
                'kernel_size': 3,
                'dropout_p': 0.1,
                'num_layers': 6,
                'num_heads': 2,
                'hidden_channels_ffn': 16,  # 4 times the hidden_channels
                'input_length': None
            },
            use_encoder_prenet=True,
            num_flow_blocks_dec=12,
            kernel_size_dec=5,
            dilation_rate=5,
            num_block_layers=4,
            dropout_p_dec=0.,
            num_speakers=0,
            c_in_channels=0,
            num_splits=4,
            num_squeeze=1,
            sigmoid_scale=False,
            mean_only=False).to(device)

        # reference model to compare model weights
        model_ref = GlowTts(
            num_chars=32,
            hidden_channels_enc=48,
            hidden_channels_dec=48,
            hidden_channels_dp=32,
            out_channels=80,
            encoder_type='rel_pos_transformer',
            encoder_params={
                'kernel_size': 3,
                'dropout_p': 0.1,
                'num_layers': 6,
                'num_heads': 2,
                'hidden_channels_ffn': 16,  # 4 times the hidden_channels
                'input_length': None
            },
            use_encoder_prenet=True,
            num_flow_blocks_dec=12,
            kernel_size_dec=5,
            dilation_rate=5,
            num_block_layers=4,
            dropout_p_dec=0.,
            num_speakers=0,
            c_in_channels=0,
            num_splits=4,
            num_squeeze=1,
            sigmoid_scale=False,
            mean_only=False).to(device)

        model.train()
        print(" > Num parameters for GlowTTS model:%s" %
              (count_parameters(model)))

        # pass the state to ref model
        model_ref.load_state_dict(copy.deepcopy(model.state_dict()))

        count = 0
        for param, param_ref in zip(model.parameters(),
                                    model_ref.parameters()):
            assert (param - param_ref).sum() == 0, param
            count += 1

        optimizer = optim.Adam(model.parameters(), lr=c.lr)
        for _ in range(5):
            z, logdet, y_mean, y_log_scale, alignments, o_dur_log, o_total_dur = model.forward(
                input_dummy, input_lengths, mel_spec, mel_lengths, None)
            optimizer.zero_grad()
            loss_dict = criterion(z, y_mean, y_log_scale, logdet, mel_lengths,
                                  o_dur_log, o_total_dur, input_lengths)
            loss = loss_dict['loss']
            loss.backward()
            optimizer.step()

        # check parameter changes
        count = 0
        for param, param_ref in zip(model.parameters(),
                                    model_ref.parameters()):
            assert (param != param_ref).any(
            ), "param {} with shape {} not updated!! \n{}\n{}".format(
                count, param.shape, param, param_ref)
            count += 1

Пример #5

Показать файл

    def get_criterion(self):
        from TTS.tts.layers.losses import GlowTTSLoss  # pylint: disable=import-outside-toplevel

        return GlowTTSLoss()

Пример #6

Показать файл

Файл: train_glow_tts.py Проект: entn-at/mozilla_TTS

def main(args):  # pylint: disable=redefined-outer-name
    # pylint: disable=global-variable-undefined
    global meta_data_train, meta_data_eval, symbols, phonemes
    # Audio processor
    ap = AudioProcessor(**c.audio)
    if 'characters' in c.keys():
        symbols, phonemes = make_symbols(**c.characters)

    # DISTRUBUTED
    if num_gpus > 1:
        init_distributed(args.rank, num_gpus, args.group_id,
                         c.distributed["backend"], c.distributed["url"])
    num_chars = len(phonemes) if c.use_phonemes else len(symbols)

    # load data instances
    meta_data_train, meta_data_eval = load_meta_data(c.datasets)

    # set the portion of the data used for training
    if 'train_portion' in c.keys():
        meta_data_train = meta_data_train[:int(len(meta_data_train) * c.train_portion)]
    if 'eval_portion' in c.keys():
        meta_data_eval = meta_data_eval[:int(len(meta_data_eval) * c.eval_portion)]

    # parse speakers
    if c.use_speaker_embedding:
        speakers = get_speakers(meta_data_train)
        if args.restore_path:
            prev_out_path = os.path.dirname(args.restore_path)
            speaker_mapping = load_speaker_mapping(prev_out_path)
            assert all([speaker in speaker_mapping
                        for speaker in speakers]), "As of now you, you cannot " \
                                                   "introduce new speakers to " \
                                                   "a previously trained model."
        else:
            speaker_mapping = {name: i for i, name in enumerate(speakers)}
        save_speaker_mapping(OUT_PATH, speaker_mapping)
        num_speakers = len(speaker_mapping)
        print("Training with {} speakers: {}".format(num_speakers,
                                                     ", ".join(speakers)))
    else:
        num_speakers = 0

    # setup model
    model = setup_model(num_chars, num_speakers, c)
    optimizer = RAdam(model.parameters(), lr=c.lr, weight_decay=0, betas=(0.9, 0.98), eps=1e-9)
    criterion = GlowTTSLoss()

    if c.apex_amp_level:
        # pylint: disable=import-outside-toplevel
        from apex import amp
        from apex.parallel import DistributedDataParallel as DDP
        model.cuda()
        model, optimizer = amp.initialize(model, optimizer, opt_level=c.apex_amp_level)
    else:
        amp = None

    if args.restore_path:
        checkpoint = torch.load(args.restore_path, map_location='cpu')
        try:
            # TODO: fix optimizer init, model.cuda() needs to be called before
            # optimizer restore
            optimizer.load_state_dict(checkpoint['optimizer'])
            if c.reinit_layers:
                raise RuntimeError
            model.load_state_dict(checkpoint['model'])
        except: #pylint: disable=bare-except
            print(" > Partial model initialization.")
            model_dict = model.state_dict()
            model_dict = set_init_dict(model_dict, checkpoint['model'], c)
            model.load_state_dict(model_dict)
            del model_dict

        if amp and 'amp' in checkpoint:
            amp.load_state_dict(checkpoint['amp'])

        for group in optimizer.param_groups:
            group['initial_lr'] = c.lr
        print(" > Model restored from step %d" % checkpoint['step'],
              flush=True)
        args.restore_step = checkpoint['step']
    else:
        args.restore_step = 0

    if use_cuda:
        model.cuda()
        criterion.cuda()

    # DISTRUBUTED
    if num_gpus > 1:
        model = DDP(model)

    if c.noam_schedule:
        scheduler = NoamLR(optimizer,
                           warmup_steps=c.warmup_steps,
                           last_epoch=args.restore_step - 1)
    else:
        scheduler = None

    num_params = count_parameters(model)
    print("\n > Model has {} parameters".format(num_params), flush=True)

    if 'best_loss' not in locals():
        best_loss = float('inf')

    global_step = args.restore_step
    model = data_depended_init(model, ap)
    for epoch in range(0, c.epochs):
        c_logger.print_epoch_start(epoch, c.epochs)
        train_avg_loss_dict, global_step = train(model, criterion, optimizer,
                                                 scheduler, ap, global_step,
                                                 epoch, amp)
        eval_avg_loss_dict = evaluate(model, criterion, ap, global_step, epoch)
        c_logger.print_epoch_end(epoch, eval_avg_loss_dict)
        target_loss = train_avg_loss_dict['avg_loss']
        if c.run_eval:
            target_loss = eval_avg_loss_dict['avg_loss']
        best_loss = save_best_model(target_loss, best_loss, model, optimizer, global_step, epoch, c.r,
                                    OUT_PATH, amp_state_dict=amp.state_dict() if amp else None)