示例#1
0
import utility
from autovc.model_vc import Generator
from config import Config
from data_converter import Converter
from parallel_wavegan.utils import read_hdf5

from autovc.synthesis import build_model_melgan, melgan


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if device.type == "cuda":
    print(torch.cuda.get_device_name(0))

converter = Converter(device)

import yaml

from parallel_wavegan.utils import download_pretrained_model, load_model


def logmelfilterbank(audio,
                     sampling_rate,
                     fft_size=1024,
                     hop_size=256,
                     win_length=None,
                     window="hann",
                     num_mels=80,
                     fmin=None,
                     fmax=None,
示例#2
0
        f"    |-embedding.npy         (single (speaker) embedding per speaker)\n"
        f"  |-...\n")

    args = parser.parse_args()

    cur_time = datetime.datetime.now().strftime("%Y%m%d_%H;%M")
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    #==============================Dir settings==========================
    input_dir = args.input_dir
    output_dir = args.output_dir

    #===========================Converter (+output dir addition)================================
    if args.spectrogram_type == "standard":
        log.info("Using the default AutoVC spectrogram creator")
        converter = Converter(device)
    elif args.spectrogram_type == "melgan":
        log.info("Using a melgan spectrogram-converter for dataset generator")
        converter = MelganConverter(device,
                                    Config.dir_paths["melgan_config_path"],
                                    Config.dir_paths["melgan_stats_path"])

    #===============================create metadata (if it does not exist already)====================
    if not os.path.exists(os.path.join(
            output_dir,
            Config.train_metadata_name)):  #if metadata doesnt already exist
        _ = converter.generate_train_data(input_dir, output_dir,
                                          Config.train_metadata_name)
    else:
        log.warning(
            f" ATTENTION: metadata already exists at: {os.path.join(output_dir, Config.train_metadata_name)}, now exiting..."
示例#3
0
from librosa.filters import mel as librosa_mel_fn
from numpy.random import RandomState
from sklearn.preprocessing import StandardScaler

import utility
from autovc.model_vc import Generator
from config import Config
from data_converter import Converter
from parallel_wavegan.utils import read_hdf5

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if device.type == "cuda":
    print(torch.cuda.get_device_name(0))

converter = Converter(device)

import yaml

from parallel_wavegan.utils import download_pretrained_model, load_model


def logmelfilterbank(
    audio,
    sampling_rate,
    fft_size=1024,  #TODO: scale this based on sampling rate as well? Otherwise each fft-frame (and spectrogram entry) is less time (e.g. at 22khz, 1024 samples is 0.05sec, while at 44khz its 0.1 sec)
    hop_size=256,
    win_length=None,
    window="hann",
    num_mels=80,
    fmin=None,
示例#4
0
source_list = ["p225_001"]
target_speaker = "Wouter"
target_list = ["1", "2", "3", "4", "5", "6", "7"]

# directories
input_dir = Config.dir_paths["input"]
converted_data_dir = Config.dir_paths["metadata"]
output_file_dir = Config.dir_paths["output"]
metadata_name = Config.metadata_name

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if device.type == "cuda":
    print(torch.cuda.get_device_name(0))

converter = Converter(device)

spec_dir = Config.dir_paths["spectrograms"]
specs = converter._wav_to_spec(input_dir, spec_dir)

spect_convert_list = [('Wouter_test_wav_to_spect_to_wav', specs["Wouter"]["6"])
                      ]  #6 = "This is a test sentence"

# input_data = converter.wav_to_input(input_dir, source_speaker, target_speaker, source_list, target_list, converted_data_dir, metadata_name)

converter.output_to_wav(spect_convert_list)

print("Done")
# input_data = converter.wav_to_input(input_dir, source_speaker, target_speaker, source_list, target_list, converted_data_dir, metadata_name)

# output_data = inference(output_file_dir, device, input_data=input_data)
示例#5
0
        output_file_dir = os.path.join(output_file_dir, "griffin")
    elif args.vocoder == "wavenet":
        from vocoders import WaveNet
        vocoder_path = os.path.join(Config.dir_paths["networks"],
                                    Config.pretrained_names["wavenet"])
        vocoder = WaveNet(device, vocoder_path)
        output_file_dir = os.path.join(output_file_dir, "wavenet")
    elif args.vocoder == "melgan":
        from vocoders import MelGan
        spectrogram_type = "melgan"
        vocoder = MelGan(device)
        output_file_dir = os.path.join(output_file_dir, "melgan")

    sr = 16000
    if spectrogram_type == "standard":
        converter = Converter(device)
    elif spectrogram_type == "melgan":
        sr = 24000
        converter = MelganConverter(device,
                                    Config.dir_paths["melgan_config_path"],
                                    Config.dir_paths["melgan_stats_path"])

    skip = not args.force_preprocess
    input_data = converter.wav_to_convert_input(input_dir,
                                                source_speaker,
                                                target_speaker,
                                                source_list,
                                                converted_data_dir,
                                                metadata_name,
                                                skip_existing=skip,
                                                len_crop=args.len_crop)