def get_vocoder(): if VOCODER == 'MelGAN': vocoder = torch.hub.load("descriptinc/melgan-neurips", "load_melgan", "multi_speaker") vocoder.mel2wav.eval() vocoder.mel2wav.cuda() elif VOCODER == "WaveGlow": # waveglow_path = 'models/waveglow_256channels_universal_v4.pt' # vocoder = torch.load(waveglow_path)['model'].cuda().eval() vocoder = torch.hub.load('nvidia/DeepLearningExamples:torchhub', 'nvidia_waveglow') vocoder = vocoder.remove_weightnorm(vocoder) vocoder.eval() for m in vocoder.modules(): if 'Conv' in str(type(m)): setattr(m, 'padding_mode', 'zeros') vocoder.cuda() else: with open("hifigan/config.json", "r") as f: config = json.load(f) config = hifigan.AttrDict(config) vocoder = hifigan.Generator(config) ckpt = torch.load("hifigan/generator_universal.pth.tar") vocoder.load_state_dict(ckpt["generator"]) vocoder.eval() vocoder.remove_weight_norm() vocoder.cuda() return vocoder
def get_vocoder(config, device): name = config["vocoder"]["model"] speaker = config["vocoder"]["speaker"] if name == "MelGAN": if speaker == "LJSpeech": vocoder = torch.hub.load( "descriptinc/melgan-neurips", "load_melgan", "linda_johnson" ) elif speaker == "universal": vocoder = torch.hub.load( "descriptinc/melgan-neurips", "load_melgan", "multi_speaker" ) vocoder.mel2wav.eval() vocoder.mel2wav.to(device) elif name == "HiFi-GAN": with open("hifigan/config.json", "r") as f: config = json.load(f) config = hifigan.AttrDict(config) vocoder = hifigan.Generator(config) if speaker == "LJSpeech": ckpt = torch.load("hifigan/generator_LJSpeech.pth.tar") elif speaker == "universal": ckpt = torch.load("hifigan/generator_universal.pth.tar") vocoder.load_state_dict(ckpt["generator"]) vocoder.eval() vocoder.remove_weight_norm() vocoder.to(device) return vocoder
def get_vocoder(config, device): name = config["vocoder"]["model"] speaker = config["vocoder"]["speaker"] if name == "MelGAN": if speaker == "LJSpeech": vocoder = torch.hub.load("descriptinc/melgan-neurips", "load_melgan", "linda_johnson") elif speaker == "universal": vocoder = torch.hub.load("descriptinc/melgan-neurips", "load_melgan", "multi_speaker") vocoder.mel2wav.eval() vocoder.mel2wav.to(device) elif name == "HiFi-GAN": with open( "/content/drive/MyDrive/checkpoint/hifigan/UNIVERSAL_V1/config.json", "r") as f: config = json.load(f) config = hifigan.AttrDict(config) vocoder = hifigan.Generator(config) # if speaker == "LJSpeech": # ckpt = torch.load("hifigan/generator_LJSpeech.pth.tar") # elif speaker == "universal": # ckpt = torch.load("hifigan/generator_universal.pth.tar") ckpt = torch.load(os.path.join( '/content/drive/MyDrive/checkpoint/hifigan/UNIVERSAL_V1', 'g_03175000'), map_location=device) vocoder.load_state_dict(ckpt["generator"]) vocoder.eval() vocoder.remove_weight_norm() vocoder.to(device) return vocoder
def get_vocoder(name, device): if name == "MelGAN": vocoder = torch.hub.load("descriptinc/melgan-neurips", "load_melgan", "multi_speaker") vocoder.mel2wav.to(device) vocoder.mel2wav.eval() elif name == 'MelGAN-LJ': vocoder = torch.hub.load("descriptinc/melgan-neurips", "load_melgan", "linda_johnson") vocoder.mel2wav.to(device) vocoder.mel2wav.eval() elif name == "HiFi-GAN": with open("hifigan/config.json", "r") as f: config = json.load(f) config = hifigan.AttrDict(config) vocoder = hifigan.Generator(config) ckpt = torch.load("hifigan/generator_universal.pth.tar") vocoder.load_state_dict(ckpt["generator"]) vocoder.remove_weight_norm() vocoder.to(device) vocoder.eval() elif name == "HiFi-GAN-LJ": with open("hifigan/config.json", "r") as f: config = json.load(f) config = hifigan.AttrDict(config) vocoder = hifigan.Generator(config) ckpt = torch.load("hifigan/generator_LJSpeech.pth.tar") vocoder.load_state_dict(ckpt["generator"]) vocoder.remove_weight_norm() vocoder.to(device) vocoder.eval() else: # WaveGlow vocoder = torch.hub.load('nvidia/DeepLearningExamples:torchhub', 'nvidia_waveglow') vocoder = vocoder.remove_weightnorm(vocoder) vocoder = vocoder.to(device) vocoder.eval() return vocoder
def get_vocoder(): name = hp.vocoder speaker = hp.vocoder_speaker if name == "MelGAN": if speaker == "LJSpeech": vocoder = torch.hub.load("descriptinc/melgan-neurips", "load_melgan", "linda_johnson") elif speaker == "universal": vocoder = torch.hub.load("descriptinc/melgan-neurips", "load_melgan", "multi_speaker") vocoder.mel2wav.eval() vocoder.mel2wav.to(device) elif name == "HiFi-GAN": with open("hifigan/config.json", "r") as f: config = json.load(f) config = hifigan.AttrDict(config) vocoder = hifigan.Generator(config) if speaker == "LJSpeech": ckpt = torch.load("hifigan/generator_LJSpeech.pth.tar") elif speaker == "universal": ckpt = torch.load("hifigan/generator_universal.pth.tar") vocoder.load_state_dict(ckpt["generator"]) vocoder.eval() vocoder.remove_weight_norm() vocoder.to(device) elif name == "WaveGlow": vocoder = torch.hub.load('nvidia/DeepLearningExamples:torchhub', 'nvidia_waveglow') vocoder = vocoder.remove_weightnorm(vocoder) vocoder.eval() for m in vocoder.modules(): if 'Conv' in str(type(m)): setattr(m, 'padding_mode', 'zeros') vocoder.to(device) return vocoder