示例#1
0
文件: paths.py 项目: shawwn/WaveRNN
    def __init__(self, data_path, voc_id, tts_id):
        self.base = Path(__file__).parent.parent.expanduser().resolve()

        # Data Paths
        self.data = Path(data_path).expanduser().resolve()
        self.quant = self.data / 'quant'
        self.mel = self.data / 'mel'
        self.gta = self.data / 'gta'

        # WaveRNN/Vocoder Paths
        self.voc_checkpoints = self.base / 'checkpoints' / '%s.wavernn' % (
            repr1(voc_id))
        self.voc_latest_weights = self.voc_checkpoints / 'latest_weights.pyt'
        self.voc_latest_optim = self.voc_checkpoints / 'latest_optim.pyt'
        self.voc_output = self.base / 'model_outputs' / '%s.wavernn' % (
            repr1(voc_id))
        self.voc_step = self.voc_checkpoints / 'step.npy'
        self.voc_log = self.voc_checkpoints / 'log.txt'

        # Tactron/TTS Paths
        self.tts_checkpoints = self.base / 'checkpoints' / '%s.tacotron' % (
            repr1(tts_id))
        self.tts_latest_weights = self.tts_checkpoints / 'latest_weights.pyt'
        self.tts_latest_optim = self.tts_checkpoints / 'latest_optim.pyt'
        self.tts_output = self.base / 'model_outputs' / '%s.tacotron' % (
            repr1(tts_id))
        self.tts_step = self.tts_checkpoints / 'step.npy'
        self.tts_log = self.tts_checkpoints / 'log.txt'
        self.tts_attention = self.tts_checkpoints / 'attention'
        self.tts_mel_plot = self.tts_checkpoints / 'mel_plots'

        self.create_paths()
示例#2
0
def create_gta_features(model: Tacotron, train_set, save_path: Path):
    device = next(
        model.parameters()).device  # use same device as model parameters

    iters = len(train_set)

    for i, (x, mels, ids, mel_lens) in enumerate(train_set, 1):

        x, mels = x.to(device), mels.to(device)

        with torch.no_grad():
            _, gta, _ = model(x, mels)

        gta = gta.cpu().numpy()

        for j, item_id in enumerate(ids):
            mel = gta[j][:, :mel_lens[j]]
            mel = (mel + 4) / 8
            np.save(save_path / '%s.npy' % (repr1(item_id)),
                    mel,
                    allow_pickle=False)

        bar = progbar(i, iters)
        msg = '%s %s/%s Batches ' % (repr1(bar), repr1(i), repr1(iters))
        stream(msg)
示例#3
0
def save_checkpoint(checkpoint_type: str,
                    paths: Paths,
                    model,
                    optimizer,
                    *,
                    name=None,
                    is_silent=False):
    """Saves the training session to disk.

    Args:
        paths:  Provides information about the different paths to use.
        model:  A `Tacotron` or `WaveRNN` model to save the parameters and buffers from.
        optimizer:  An optmizer to save the state of (momentum, etc).
        name:  If provided, will name to a checkpoint with the given name. Note
            that regardless of whether this is provided or not, this function
            will always update the files specified in `paths` that give the
            location of the latest weights and optimizer state. Saving
            a named checkpoint happens in addition to this update.
    """
    def helper(path_dict, is_named):
        s = 'named' if is_named else 'latest'
        num_exist = sum(p.exists() for p in path_dict.values())

        if num_exist not in (0, 2):
            # Checkpoint broken
            raise FileNotFoundError(
                'We expected either both or no files in the %s checkpoint to exist, but instead we got exactly one!'
                % (repr1(s)))

        if num_exist == 0:
            if not is_silent: print('Creating %s checkpoint...' % (repr1(s)))
            for p in path_dict.values():
                p.parent.mkdir(parents=True, exist_ok=True)
        else:
            if not is_silent:
                print('Saving to existing %s checkpoint...' % (repr1(s)))

        if not is_silent:
            print('Saving %s weights: %s' % (repr1(s), repr1(path_dict["w"])))
        model.save(path_dict['w'])
        if not is_silent:
            print('Saving %s optimizer state: %s' %
                  (repr1(s), repr1(path_dict["o"])))
        torch.save(optimizer.state_dict(), path_dict['o'])

    weights_path, optim_path, checkpoint_path = \
        get_checkpoint_paths(checkpoint_type, paths)

    latest_paths = {'w': weights_path, 'o': optim_path}
    helper(latest_paths, False)

    if name:
        named_paths = {
            'w': checkpoint_path / '%s_weights.pyt' % (repr1(name)),
            'o': checkpoint_path / '%s_optim.pyt' % (repr1(name)),
        }
        helper(named_paths, True)
示例#4
0
def gen_testset(model: WaveRNN, test_set, samples, batched, target, overlap,
                save_path: Path):

    k = model.get_step() // 1000

    for i, (m, x) in enumerate(test_set, 1):

        if i > samples: break

        print('\n| Generating: %i/%i' % (i, samples))

        x = x[0].numpy()

        bits = 16 if hp.voc_mode == 'MOL' else hp.bits

        if hp.mu_law and hp.voc_mode != 'MOL':
            x = decode_mu_law(x, 2**bits, from_labels=True)
        else:
            x = label_2_float(x, bits)

        save_wav(x,
                 save_path / '%sk_steps_%s_target.wav' % (repr1(k), repr1(i)))

        batch_str = 'gen_batched_target%s_overlap%s' % (
            repr1(target), repr1(overlap)) if batched else 'gen_NOT_BATCHED'
        save_str = str(save_path / '%sk_steps_%s_%s.wav' %
                       (repr1(k), repr1(i), repr1(batch_str)))

        _ = model.generate(m, save_str, batched, target, overlap, hp.mu_law)
示例#5
0
    def helper(path_dict, is_named):
        s = 'named' if is_named else 'latest'
        num_exist = sum(p.exists() for p in path_dict.values())

        if num_exist not in (0, 2):
            # Checkpoint broken
            raise FileNotFoundError(
                'We expected either both or no files in the %s checkpoint to exist, but instead we got exactly one!'
                % (repr1(s)))

        if num_exist == 0:
            if not is_silent: print('Creating %s checkpoint...' % (repr1(s)))
            for p in path_dict.values():
                p.parent.mkdir(parents=True, exist_ok=True)
        else:
            if not is_silent:
                print('Saving to existing %s checkpoint...' % (repr1(s)))

        if not is_silent:
            print('Saving %s weights: %s' % (repr1(s), repr1(path_dict["w"])))
        model.save(path_dict['w'])
        if not is_silent:
            print('Saving %s optimizer state: %s' %
                  (repr1(s), repr1(path_dict["o"])))
        torch.save(optimizer.state_dict(), path_dict['o'])
示例#6
0
def restore_checkpoint(checkpoint_type: str,
                       paths: Paths,
                       model,
                       optimizer,
                       *,
                       name=None,
                       create_if_missing=False):
    """Restores from a training session saved to disk.

    NOTE: The optimizer's state is placed on the same device as it's model
    parameters. Therefore, be sure you have done `model.to(device)` before
    calling this method.

    Args:
        paths:  Provides information about the different paths to use.
        model:  A `Tacotron` or `WaveRNN` model to save the parameters and buffers from.
        optimizer:  An optmizer to save the state of (momentum, etc).
        name:  If provided, will restore from a checkpoint with the given name.
            Otherwise, will restore from the latest weights and optimizer state
            as specified in `paths`.
        create_if_missing:  If `True`, will create the checkpoint if it doesn't
            yet exist, as well as update the files specified in `paths` that
            give the location of the current latest weights and optimizer state.
            If `False` and the checkpoint doesn't exist, will raise a
            `FileNotFoundError`.
    """

    weights_path, optim_path, checkpoint_path = \
        get_checkpoint_paths(checkpoint_type, paths)

    if name:
        path_dict = {
            'w': checkpoint_path / '%s_weights.pyt' % (repr1(name)),
            'o': checkpoint_path / '%s_optim.pyt' % (repr1(name)),
        }
        s = 'named'
    else:
        path_dict = {'w': weights_path, 'o': optim_path}
        s = 'latest'

    num_exist = sum(p.exists() for p in path_dict.values())
    if num_exist == 2:
        # Checkpoint exists
        print('Restoring from %s checkpoint...' % (repr1(s)))
        print('Loading %s weights: %s' % (repr1(s), repr1(path_dict["w"])))
        model.load(path_dict['w'])
        print('Loading %s optimizer state: {path_dict["o"]}' % (repr1(s)))
        optimizer.load_state_dict(torch.load(path_dict['o']))
    elif create_if_missing:
        save_checkpoint(checkpoint_type,
                        paths,
                        model,
                        optimizer,
                        name=name,
                        is_silent=False)
    else:
        raise FileNotFoundError('The %s checkpoint could not be found!' %
                                (repr1(s)))
示例#7
0
def main():
    # Parse Arguments
    parser = argparse.ArgumentParser(description='Train Tacotron TTS')
    parser.add_argument('--force_train',
                        '-f',
                        action='store_true',
                        help='Forces the model to train past total steps')
    parser.add_argument('--force_gta',
                        '-g',
                        action='store_true',
                        help='Force the model to create GTA features')
    parser.add_argument(
        '--force_cpu',
        '-c',
        action='store_true',
        help='Forces CPU-only training, even when in CUDA capable environment')
    parser.add_argument('--hp_file',
                        metavar='FILE',
                        default='hparams.py',
                        help='The file to use for the hyperparameters')
    args = parser.parse_args()

    hp.configure(args.hp_file)  # Load hparams from file
    paths = Paths(hp.data_path, hp.voc_model_id, hp.tts_model_id)

    force_train = args.force_train
    force_gta = args.force_gta

    if not args.force_cpu and torch.cuda.is_available():
        device = torch.device('cuda')
        for session in hp.tts_schedule:
            _, _, _, batch_size = session
            if batch_size % torch.cuda.device_count() != 0:
                raise ValueError(
                    '`batch_size` must be evenly divisible by n_gpus!')
    else:
        device = torch.device('cpu')
    print('Using device:', device)

    # Instantiate Tacotron Model
    print('\nInitialising Tacotron Model...\n')
    model = Tacotron(embed_dims=hp.tts_embed_dims,
                     num_chars=len(symbols),
                     encoder_dims=hp.tts_encoder_dims,
                     decoder_dims=hp.tts_decoder_dims,
                     n_mels=hp.num_mels,
                     fft_bins=hp.num_mels,
                     postnet_dims=hp.tts_postnet_dims,
                     encoder_K=hp.tts_encoder_K,
                     lstm_dims=hp.tts_lstm_dims,
                     postnet_K=hp.tts_postnet_K,
                     num_highways=hp.tts_num_highways,
                     dropout=hp.tts_dropout,
                     stop_threshold=hp.tts_stop_threshold).to(device)

    optimizer = optim.Adam(model.parameters())
    restore_checkpoint('tts', paths, model, optimizer, create_if_missing=True)

    if not force_gta:
        for i, session in enumerate(hp.tts_schedule):
            current_step = model.get_step()

            r, lr, max_step, batch_size = session

            training_steps = max_step - current_step

            # Do we need to change to the next session?
            if current_step >= max_step:
                # Are there no further sessions than the current one?
                if i == len(hp.tts_schedule) - 1:
                    # There are no more sessions. Check if we force training.
                    if force_train:
                        # Don't finish the loop - train forever
                        training_steps = 999_999_999
                    else:
                        # We have completed training. Breaking is same as continue
                        break
                else:
                    # There is a following session, go to it
                    continue

            model.r = r

            simple_table([('Steps with r=%s' % (repr1(r)),
                           str(training_steps // 1000) + 'k Steps'),
                          ('Batch Size', batch_size), ('Learning Rate', lr),
                          ('Outputs/Step (r)', model.r)])

            train_set, attn_example = get_tts_datasets(paths.data, batch_size,
                                                       r)
            tts_train_loop(paths, model, optimizer, train_set, lr,
                           training_steps, attn_example)

        print('Training Complete.')
        print(
            'To continue training increase tts_total_steps in hparams.py or use --force_train\n'
        )

    print('Creating Ground Truth Aligned Dataset...\n')

    train_set, attn_example = get_tts_datasets(paths.data, 8, model.r)
    create_gta_features(model, train_set, paths.gta)

    print(
        '\n\nYou can now train WaveRNN on GTA features - use python train_wavernn.py --gta\n'
    )
示例#8
0
def tts_train_loop(paths: Paths, model: Tacotron, optimizer, train_set, lr,
                   train_steps, attn_example):
    device = next(
        model.parameters()).device  # use same device as model parameters

    for g in optimizer.param_groups:
        g['lr'] = lr

    total_iters = len(train_set)
    epochs = train_steps // total_iters + 1

    for e in range(1, epochs + 1):

        start = time.time()
        running_loss = 0

        # Perform 1 epoch
        for i, (x, m, ids, _) in enumerate(train_set, 1):

            x, m = x.to(device), m.to(device)

            # Parallelize model onto GPUS using workaround due to python bug
            if device.type == 'cuda' and torch.cuda.device_count() > 1:
                m1_hat, m2_hat, attention = data_parallel_workaround(
                    model, x, m)
            else:
                m1_hat, m2_hat, attention = model(x, m)

            m1_loss = F.l1_loss(m1_hat, m)
            m2_loss = F.l1_loss(m2_hat, m)

            loss = m1_loss + m2_loss

            optimizer.zero_grad()
            loss.backward()
            if hp.tts_clip_grad_norm is not None:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), hp.tts_clip_grad_norm)
                if np.isnan(grad_norm):
                    print('grad_norm was NaN!')

            optimizer.step()

            running_loss += loss.item()
            avg_loss = running_loss / i

            speed = i / (time.time() - start)

            step = model.get_step()
            k = step // 1000

            if step % hp.tts_checkpoint_every == 0:
                ckpt_name = 'taco_step%sK' % (repr1(k))
                save_checkpoint('tts',
                                paths,
                                model,
                                optimizer,
                                name=ckpt_name,
                                is_silent=True)

            if attn_example in ids:
                idx = ids.index(attn_example)
                save_attention(np_now(attention[idx][:, :160]),
                               paths.tts_attention / '%s' % (repr1(step)))
                save_spectrogram(np_now(m2_hat[idx]),
                                 paths.tts_mel_plot / '%s' % (repr1(step)),
                                 600)

            msg = '| Epoch: %s/%s (%s/%s) | Loss: %.4f | %.2f steps/s | Step: %sk | ' % (
                repr1(e), repr1(epochs), repr1(i), repr1(total_iters),
                avg_loss, speed, repr1(k))
            stream(msg)

        # Must save latest optimizer state to ensure that resuming training
        # doesn't produce artifacts
        save_checkpoint('tts', paths, model, optimizer, is_silent=True)
        model.log(paths.tts_log, msg)
        print(' ')
示例#9
0
 def __getitem__(self, index):
     item_id = self.metadata[index]
     m = np.load(self.mel_path / '%s.npy' % (repr1(item_id)))
     x = np.load(self.quant_path / '%s.npy' % (repr1(item_id)))
     return m, x
示例#10
0
 def __getitem__(self, index):
     item_id = self.metadata[index]
     x = text_to_sequence(self.text_dict[item_id], hp.tts_cleaner_names)
     mel = np.load(self.path / 'mel' / '%s.npy' % (repr1(item_id)))
     mel_len = mel.shape[-1]
     return x, mel, item_id, mel_len
示例#11
0
def gen_from_file(model: WaveRNN, load_path: Path, save_path: Path, batched,
                  target, overlap):

    k = model.get_step() // 1000
    file_name = load_path.stem

    suffix = load_path.suffix
    if suffix == ".wav":
        wav = load_wav(load_path)
        save_wav(
            wav, save_path / '__%s__%sk_steps_target.wav' %
            (repr1(file_name), repr1(k)))
        mel = melspectrogram(wav)
    elif suffix == ".npy":
        mel = np.load(load_path)
        if mel.ndim != 2 or mel.shape[0] != hp.num_mels:
            raise ValueError(
                'Expected a numpy array shaped (n_mels, n_hops), but got %s!' %
                (repr1(wav.shape)))
        _max = np.max(mel)
        _min = np.min(mel)
        if _max >= 1.01 or _min <= -0.01:
            raise ValueError(
                'Expected spectrogram range in [0,1] but was instead [%s, %s]'
                % (repr1(_min), repr1(_max)))
    else:
        raise ValueError('Expected an extension of .wav or .npy, but got %s!' %
                         (repr1(suffix)))

    mel = torch.tensor(mel).unsqueeze(0)

    batch_str = 'gen_batched_target%s_overlap%s' % (
        repr1(target), repr1(overlap)) if batched else 'gen_NOT_BATCHED'
    save_str = save_path / '__%s__%sk_steps_%s.wav' % (
        repr1(file_name), repr1(k), repr1(batch_str))

    _ = model.generate(mel, save_str, batched, target, overlap, hp.mu_law)
示例#12
0
文件: paths.py 项目: shawwn/WaveRNN
 def get_voc_named_optim(self, name):
     """Gets the path for the optimizer state in a named voc checkpoint."""
     return self.voc_checkpoints / '%s_optim.pyt' % (repr1(name))
示例#13
0
文件: paths.py 项目: shawwn/WaveRNN
 def get_voc_named_weights(self, name):
     """Gets the path for the weights in a named voc checkpoint."""
     return self.voc_checkpoints / '%s_weights.pyt' % (repr1(name))
示例#14
0
def voc_train_loop(paths: Paths, model: WaveRNN, loss_func, optimizer,
                   train_set, test_set, lr, total_steps):
    # Use same device as model parameters
    device = next(model.parameters()).device

    for g in optimizer.param_groups:
        g['lr'] = lr

    total_iters = len(train_set)
    epochs = (total_steps - model.get_step()) // total_iters + 1

    for e in range(1, epochs + 1):

        start = time.time()
        running_loss = 0.

        for i, (x, y, m) in enumerate(train_set, 1):
            x, m, y = x.to(device), m.to(device), y.to(device)

            # Parallelize model onto GPUS using workaround due to python bug
            if device.type == 'cuda' and torch.cuda.device_count() > 1:
                y_hat = data_parallel_workaround(model, x, m)
            else:
                y_hat = model(x, m)

            if model.mode == 'RAW':
                y_hat = y_hat.transpose(1, 2).unsqueeze(-1)

            elif model.mode == 'MOL':
                y = y.float()

            y = y.unsqueeze(-1)

            loss = loss_func(y_hat, y)

            optimizer.zero_grad()
            loss.backward()
            if hp.voc_clip_grad_norm is not None:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), hp.voc_clip_grad_norm)
                if np.isnan(grad_norm):
                    print('grad_norm was NaN!')
            optimizer.step()

            running_loss += loss.item()
            avg_loss = running_loss / i

            speed = i / (time.time() - start)

            step = model.get_step()
            k = step // 1000

            if step % hp.voc_checkpoint_every == 0:
                gen_testset(model, test_set, hp.voc_gen_at_checkpoint,
                            hp.voc_gen_batched, hp.voc_target, hp.voc_overlap,
                            paths.voc_output)
                ckpt_name = 'wave_step%sK' % (repr1(k))
                save_checkpoint('voc',
                                paths,
                                model,
                                optimizer,
                                name=ckpt_name,
                                is_silent=True)

            msg = '| Epoch: %s/%s (%s/%s) | Loss: %.4f | %.1f steps/s | Step: %sk | ' % (
                repr1(e), repr1(epochs), repr1(i), repr1(total_iters),
                avg_loss, speed, repr1(k))
            stream(msg)

        # Must save latest optimizer state to ensure that resuming training
        # doesn't produce artifacts
        save_checkpoint('voc', paths, model, optimizer, is_silent=True)
        model.log(paths.voc_log, msg)
        print(' ')
示例#15
0
def get_files(path: Union[str, Path], extension='.wav'):
    if isinstance(path, str): path = Path(path).expanduser().resolve()
    return list(path.rglob('*%s' % (repr1(extension))))
示例#16
0
    if input_text:
        inputs = [text_to_sequence(input_text.strip(), hp.tts_cleaner_names)]
    else:
        with open('sentences.txt') as f:
            inputs = [
                text_to_sequence(l.strip(), hp.tts_cleaner_names) for l in f
            ]

    voc_k = voc_model.get_step() // 1000
    tts_k = tts_model.get_step() // 1000

    r = tts_model.r

    simple_table([('WaveRNN', str(voc_k) + 'k'),
                  ('Tacotron(r=%s)' % (repr1(r)), str(tts_k) + 'k'),
                  ('Generation Mode', 'Batched' if batched else 'Unbatched'),
                  ('Target Samples', target if batched else 'N/A'),
                  ('Overlap Samples', overlap if batched else 'N/A')])

    for i, x in enumerate(inputs, 1):

        print('\n| Generating %s/%s' % (repr1(i), repr1(len(inputs))))
        _, m, attention = tts_model.generate(x)

        if input_text:
            save_path = 'quick_start/__input_%s_%sk.wav' % (repr1(
                input_text[:10]), repr1(tts_k))
        else:
            save_path = 'quick_start/%s_batched%s_%sk.wav' % (
                repr1(i), repr1(str(batched)), repr1(tts_k))