Python SpeakerEncoder.load_state_dict示例

编程语言: Python

命名空间/包名称: TTS.speaker_encoder.model

类/类型: SpeakerEncoder

方法/功能: load_state_dict

hotexamples.com的示例: 3

Python SpeakerEncoder.load_state_dict - 已找到3个示例。这些是从开源项目中提取的最受好评的TTS.speaker_encoder.model.SpeakerEncoder.load_state_dict现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

SpeakerEncoder(6)

load_state_dict(3)

compute_embedding(2)

cuda(2)

eval(1)

forward(1)

inference(1)

parameters(1)

state_dict(1)

示例#1

显示文件

文件： train_encoder.py 项目： y-kamiya/TTS

def main(args):  # pylint: disable=redefined-outer-name
    # pylint: disable=global-variable-undefined
    global meta_data_train
    global meta_data_eval

    ap = AudioProcessor(**c.audio)
    model = SpeakerEncoder(
        input_dim=c.model["input_dim"],
        proj_dim=c.model["proj_dim"],
        lstm_dim=c.model["lstm_dim"],
        num_lstm_layers=c.model["num_lstm_layers"],
    )
    optimizer = RAdam(model.parameters(), lr=c.lr)

    if c.loss == "ge2e":
        criterion = GE2ELoss(loss_method="softmax")
    elif c.loss == "angleproto":
        criterion = AngleProtoLoss()
    else:
        raise Exception("The %s  not is a loss supported" % c.loss)

    if args.restore_path:
        checkpoint = torch.load(args.restore_path)
        try:
            # TODO: fix optimizer init, model.cuda() needs to be called before
            # optimizer restore
            # optimizer.load_state_dict(checkpoint['optimizer'])
            if c.reinit_layers:
                raise RuntimeError
            model.load_state_dict(checkpoint["model"])
        except KeyError:
            print(" > Partial model initialization.")
            model_dict = model.state_dict()
            model_dict = set_init_dict(model_dict, checkpoint, c)
            model.load_state_dict(model_dict)
            del model_dict
        for group in optimizer.param_groups:
            group["lr"] = c.lr
        print(" > Model restored from step %d" % checkpoint["step"], flush=True)
        args.restore_step = checkpoint["step"]
    else:
        args.restore_step = 0

    if use_cuda:
        model = model.cuda()
        criterion.cuda()

    if c.lr_decay:
        scheduler = NoamLR(optimizer, warmup_steps=c.warmup_steps, last_epoch=args.restore_step - 1)
    else:
        scheduler = None

    num_params = count_parameters(model)
    print("\n > Model has {} parameters".format(num_params), flush=True)

    # pylint: disable=redefined-outer-name
    meta_data_train, meta_data_eval = load_meta_data(c.datasets)

    global_step = args.restore_step
    _, global_step = train(model, criterion, optimizer, scheduler, ap, global_step)

示例#2

显示文件

            #print(f'wav_file: {wav_file}')
            if os.path.exists(wav_file):
                wav_files.append(wav_file)
    print(f'Count of wavs imported: {len(wav_files)}')
else:
    # Parse all wav files in data_path
    wav_path = data_path
    wav_files = glob.glob(data_path + '/**/*.wav', recursive=True)

output_files = [
    wav_file.replace(wav_path, args.output_path).replace('.wav', '.npy')
    for wav_file in wav_files
]

for output_file in output_files:
    os.makedirs(os.path.dirname(output_file), exist_ok=True)

model = SpeakerEncoder(**c.model)
model.load_state_dict(torch.load(args.model_path)['model'])
model.eval()
if args.use_cuda:
    model.cuda()

for idx, wav_file in enumerate(tqdm(wav_files)):
    mel_spec = ap.melspectrogram(ap.load_wav(wav_file)).T
    mel_spec = torch.FloatTensor(mel_spec[None, :, :])
    if args.use_cuda:
        mel_spec = mel_spec.cuda()
    embedd = model.compute_embedding(mel_spec)
    np.save(output_files[idx], embedd.detach().cpu().numpy())

示例#3

显示文件

文件： compute_embeddings.py 项目： TeHikuMedia/coqui_tts

                # print(f'wav_file: {wav_file}')
                if os.path.exists(wav_file):
                    wav_files.append(wav_file)
        print(f"Count of wavs imported: {len(wav_files)}")
    else:
        # Parse all wav files in data_path
        wav_files = glob.glob(data_path + "/**/*.wav", recursive=True)

        output_files = [wav_file.replace(data_path, args.output_path).replace(".wav", ".npy") for wav_file in wav_files]

for output_file in output_files:
    os.makedirs(os.path.dirname(output_file), exist_ok=True)

# define Encoder model
model = SpeakerEncoder(**c.model)
model.load_state_dict(torch.load(args.model_path)["model"])
model.eval()
if args.use_cuda:
    model.cuda()

# compute speaker embeddings
speaker_mapping = {}
for idx, wav_file in enumerate(tqdm(wav_files)):
    if isinstance(wav_file, list):
        speaker_name = wav_file[2]
        wav_file = wav_file[1]

    mel_spec = ap.melspectrogram(ap.load_wav(wav_file, sr=ap.sample_rate)).T
    mel_spec = torch.FloatTensor(mel_spec[None, :, :])
    if args.use_cuda:
        mel_spec = mel_spec.cuda()