"--syn_models_dir", type=Path, default="synthesizer/saved_models", help="Directory containing saved synthesizer models") parser.add_argument("-v", "--voc_models_dir", type=Path, default="vocoder/saved_models", help="Directory containing saved vocoder models") parser.add_argument("--cpu", action="store_true", help=\ "If True, processing is done on CPU, even when a GPU is available.") parser.add_argument("--seed", type=int, default=None, help=\ "Optional random number seed value to make toolbox deterministic.") parser.add_argument("--no_mp3_support", action="store_true", help=\ "If True, no mp3 files are allowed.") args = parser.parse_args() print_args(args, parser) if args.cpu: # Hide GPUs from Pytorch to force CPU processing os.environ["CUDA_VISIBLE_DEVICES"] = "-1" del args.cpu ## Remind the user to download pretrained models if needed check_model_paths(encoder_path=args.enc_models_dir, synthesizer_path=args.syn_models_dir, vocoder_path=args.voc_models_dir) # Launch the toolbox Toolbox(**vars(args))
if torch.cuda.is_available(): device_id = torch.cuda.current_device() gpu_properties = torch.cuda.get_device_properties(device_id) ## Print some environment information (for debugging purposes) print( "Found %d GPUs available. Using GPU %d (%s) of compute capability %d.%d with " "%.1fGb total memory.\n" % (torch.cuda.device_count(), device_id, gpu_properties.name, gpu_properties.major, gpu_properties.minor, gpu_properties.total_memory / 1e9)) else: print("Using CPU for inference.\n") ## Remind the user to download pretrained models if needed check_model_paths(encoder_path=args.enc_model_fpath, synthesizer_path=args.syn_model_fpath, vocoder_path=args.voc_model_fpath) ## Load the models one by one. print("Preparing the encoder, the synthesizer and the vocoder...") encoder.load_model(args.enc_model_fpath) synthesizer = Synthesizer(args.syn_model_fpath) vocoder.load_model(args.voc_model_fpath) ## Run a test print("Testing your configuration with small inputs.") # Forward an audio waveform of zeroes that lasts 1 second. Notice how we can get the encoder's # sampling rate, which may differ. # If you're unfamiliar with digital audio, know that it is encoded as an array of floats # (or sometimes integers, but mostly floats in this projects) ranging from -1 to 1. # The sampling rate is the number of values (samples) recorded per second, it is set to
from voices_dict import voices_dict Lorem_Ipsum = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua." encoder_path = Path( "/Users/glw001/Documents/Development/voice_clone/Real-Time-Voice-Cloning/encoder/saved_models/pretrained.pt" ) synthesizer_path = Path( "/Users/glw001/Documents/Development/voice_clone/Real-Time-Voice-Cloning/synthesizer/saved_models/logs-pretrained/" ) vocoder_path = Path( "/Users/glw001/Documents/Development/voice_clone/Real-Time-Voice-Cloning/vocoder/saved_models/pretrained/pretrained.pt" ) check_model_paths(encoder_path=encoder_path, synthesizer_path=synthesizer_path, vocoder_path=vocoder_path) global in_fpath, filenum, preprocessed_wav, embed, torch, vocoder filenum = 0 data_path = '/Users/glw001/Documents/Development/voice_clone/LibriSpeech/train-clean-100' in_fpath = Path(f'{data_path}/F1088-Christabel/134315/1088-134315-0002.flac') seed = 694201312 word_substitutions = {'do': 'doo', 'Do': 'Doo', 'NPC': 'En Pee See'} ## Load the models one by one. print("Preparing the encoder, the synthesizer and the vocoder...") encoder.load_model(encoder_path)