def get_model_params(self): self.fake_data = Generator(self.batchSize, self.noise_z) if self.flags.useDualDisc: print("use dual disc!") self.disc_real, self.real_h = Discriminator(self.real_data, self.keep_prob, name='Discriminator2', useBias=False) self.disc_fake, self.fake_h = Discriminator( self.fake_data, self.keep_prob) self.gen_params = lib.params_with_name('Generator') self.disc_params = lib.params_with_name( 'Discriminator') + lib.params_with_name('Discriminator2') else: self.disc_real, self.real_h = Discriminator( self.real_data, self.keep_prob) self.disc_fake, self.fake_h = Discriminator( self.fake_data, self.keep_prob) self.gen_params = lib.params_with_name('Generator') self.disc_params = lib.params_with_name('Discriminator') print(self.disc_params) print(self.gen_params) # For saving samples self.saver = tf.train.Saver(max_to_keep=100) self.fixed_noise = tf.constant( np.random.normal(size=(128, 128)).astype('float32')) self.fixed_noise_samples = Generator(128, noise=self.fixed_noise)
def create_nets(opt): def _get_state_path(net_name): """Example: runs/default/netG_A2B.pth""" _run_dir = os.path.join(opt.root_dir, 'runs', opt.run_id) return os.path.join(_run_dir, f'{net_name}.pth') netG_A2B = Generator(opt.input_nc, opt.output_nc, n_residual_blocks=opt.n_res_blocks, use_mask=opt.use_mask) netG_B2A = Generator(opt.output_nc, opt.input_nc, n_residual_blocks=opt.n_res_blocks, use_mask=opt.use_mask) if opt.cuda: print('Convert to cuda \n\n\n\n') # Load state dicts netG_A2B.load_state_dict( torch.load(_get_state_path('netG_A2B'), map_location='cpu')) netG_B2A.load_state_dict( torch.load(_get_state_path('netG_B2A'), map_location='cpu')) # Set model's test mode netG_A2B.eval() netG_B2A.eval() return netG_A2B, netG_B2A
def build_models(hps, current_res_w, use_ema_sampling=False, num_classes=None, label_list=None): # todo: fix num_classes mapping_network = MappingNetwork() if hps.do_mapping_network else None gen_model = Generator(current_res_w, hps.res_w, use_pixel_norm=hps.do_pixel_norm, start_shape=(hps.start_res_h, hps.start_res_w), equalized_lr=hps.do_equalized_lr, traditional_input=hps.do_traditional_input, add_noise=hps.do_add_noise, resize_method=hps.resize_method, use_mapping_network=hps.do_mapping_network, cond_layers=hps.cond_layers, map_cond=hps.map_cond) dis_model = Discriminator(current_res_w, equalized_lr=hps.do_equalized_lr, do_minibatch_stddev=hps.do_minibatch_stddev, end_shape=(hps.start_res_h, hps.start_res_w), resize_method=hps.resize_method, cgan_nclasses=num_classes, label_list=label_list) if use_ema_sampling: sampling_model = Generator(current_res_w, hps.res_w, use_pixel_norm=hps.do_pixel_norm, start_shape=(hps.start_res_h, hps.start_res_w), equalized_lr=hps.do_equalized_lr, traditional_input=hps.do_traditional_input, add_noise=hps.do_add_noise, resize_method=hps.resize_method, use_mapping_network=hps.do_mapping_network, cond_layers=hps.cond_layers, map_cond=hps.map_cond) return gen_model, mapping_network, dis_model, sampling_model else: return gen_model, mapping_network, dis_model
def loadModel(options): GEN_A2B = Generator(inC, outC) GEN_B2A = Generator(outC, inC) if options["cuda"]: GEN_A2B.cuda() GEN_B2A.cuda() GEN_A2B.load_state_dict(torch.load(options["GEN_A2B"])) GEN_B2A.load_state_dict(torch.load(options["GEN_B2A"])) GEN_A2B.eval() GEN_B2A.eval() return GEN_A2B, GEN_B2A
def inference(): # Inference Path # make_dirs(config.inference_path) # Prepare Data Loader # val_loader = get_edges2shoes_loader(purpose='val', batch_size=config.val_batch_size) # Prepare Generator # G_A2B = Generator().to(device) G_B2A = Generator().to(device) G_A2B.load_state_dict( torch.load( os.path.join( config.weights_path, 'DiscoGAN_Generator_A2B_Epoch_{}.pkl'.format( config.num_epochs)))) G_B2A.load_state_dict( torch.load( os.path.join( config.weights_path, 'DiscoGAN_Generator_B2A_Epoch_{}.pkl'.format( config.num_epochs)))) # Test # print("DiscoGAN | Generating Edges2Shoes images started...") for i, (real_A, real_B) in enumerate(val_loader): # Prepare Data # real_A = real_A.to(device) real_B = real_B.to(device) # Generate Fake Images # fake_B = G_A2B(real_A) fake_A = G_B2A(real_B) # Generated Reconstructed Images # fake_ABA = G_B2A(fake_B) fake_BAB = G_A2B(fake_A) # Save Images # result = torch.cat( (real_A, fake_A, fake_BAB, real_B, fake_B, fake_ABA), dim=0) save_image(denorm(result.data), os.path.join( config.inference_path, 'DiscoGAN_Edges2Shoes_Results_%03d.png' % (i + 1)), nrow=8, normalize=True) # Make a GIF file # make_gifs_test("DiscoGAN", config.inference_path)
def __init__(self, hparams): super(AgingGAN, self).__init__() self.genA2B = Generator(hparams['ngf'], n_residual_blocks=hparams['n_blocks']) self.genB2A = Generator(hparams['ngf'], n_residual_blocks=hparams['n_blocks']) self.disGA = Discriminator(hparams['ndf']) self.disGB = Discriminator(hparams['ndf']) # cache for generated images self.generated_A = None self.generated_B = None self.real_A = None self.real_B = None
def _init_models(self): self.G_A2B = Generator(64, 9) self.D_B = Discriminator(self.config.image_size, 64, 4) self.G_A2B.apply(weights_init_normal) self.D_B.apply(weights_init_normal) self.G_A2B = torch.nn.DataParallel(self.G_A2B).to(self.device) self.D_B = torch.nn.DataParallel(self.D_B).to(self.device) self.G_B2A = Generator(64, 9) self.D_A = Discriminator(self.config.image_size, 64, 4) self.G_B2A.apply(weights_init_normal) self.D_A.apply(weights_init_normal) self.G_B2A = torch.nn.DataParallel(self.G_B2A).to(self.device) self.D_A = torch.nn.DataParallel(self.D_A).to(self.device)
def main(): # Get DataLoaders train_fonts = [] with open('train52_fonts.txt', 'r') as file: for font in file: train_fonts.append(font.strip()) val_fonts = [] with open('val52_fonts.txt', 'r') as file: for font in file: val_fonts.append(font.strip()) train_x_loader, train_y_loader, val_loader = get_dataloaders('data/jpg', 'data/jpg', train_fonts, val_fonts, BATCH_SIZE, logger=log) # Initialize models gen = Generator().to(device) dis = Discriminator().to(device) epoch = 1 min_eval_loss = np.inf while epoch <= MAX_EPOCHS: train(gen, dis, train_x_loader, train_y_loader, epoch, lr=LR) eval_loss = eval(gen, val_loader, epoch) log.info(f'Eval Pixelwise BCE Loss: {eval_loss}') if eval_loss < min_eval_loss: eval_loss = min_eval_loss save(gen, dis) epoch += 1
def __init__(self, args): self._args = args # Create the generator model that will predict the SMPL paramaters self._generator = Generator(self._args) self._discriminator = Discriminator(self._args) self._gen_optim = tf.optimizers.Adam(learning_rate=self._args.gen_lr) self._disc_optim = tf.optimizers.Adam(learning_rate=self._args.disc_lr) self._data_loader = DataLoader(args) self._lsp_train_ds, self._lsp_test_ds = self._data_loader.load_lsp_dataset( ) self._smpl_train_ds, self._smpl_test_ds = self._data_loader.load_smpl_dataset( ) self._smpl_model = SMPL(self._args) self._create_summary_writer() # Setup saving and restoring model self._ckpt = tf.train.Checkpoint(generator=self._generator, discriminator=self._discriminator, gen_optim=self._gen_optim, disc_optim=self._disc_optim) self._ckpt_manager = tf.train.CheckpointManager( self._ckpt, directory=self._args.model_dir, max_to_keep=3) if self._args.load_model: self._ckpt.restore(self._ckpt_manager.latest_checkpoint) self._load_train_data()
def main(): args = parser.parse_args() src_dir = os.path.join(DATA_DIR, 'noised_tgt') dst_dir = os.path.join(DATA_DIR, 'tmp') # clear for name in os.listdir(dst_dir): if name.endswith('.npy'): os.remove(os.path.join(dst_dir, name)) # model snapshot = torch.load(args.file, map_location=lambda s, _: s) model = Generator(snapshot['channels']) model.load_state_dict(snapshot['model']) if args.gpu is not None: model.cuda(args.gpu) # generate for name in sorted(os.listdir(os.path.join(src_dir))): if not name.endswith('.npy'): continue src = os.path.join(src_dir, name) dst = os.path.join(dst_dir, name[7:]) generate(model, src, dst, args.gpu) # archive with zipfile.ZipFile('submission.zip', 'w') as zip_writer: for name in sorted(os.listdir(dst_dir)): zip_writer.write(os.path.join(dst_dir, name), name)
def loadModel(epoch = "0", directory = './checkpoints/'): """ Return Pretrained Generator Model for Sampling """ # Security Checks on input if not os.path.exists(directory): raise ValueError("ValueError directory not found") epoch = str(epoch) try: int(epoch) except ValueError: print("Non-numeric epoch specification") listG = sorted(glob.glob(directory + "G*.pth")) if len(listG) == 0: print("[*] No Checkpoints found!") return 1 ckp_file = "" numbers = [re.findall(r'\d+', path)[-1] for path in listG] for i in range(len(numbers)): if epoch < numbers[i]: ckp_file = listG[i-1] break if not ckp_file: ckp_file = listG[-1] G = Generator() gState = torch.load(ckp_file, map_location='cpu') G.load_state_dict(gState) return G
def main(args): img_shape = (args.channels, args.img_size, args.img_size) cuda = True if torch.cuda.is_available() else False # Loss function adversarial_loss = torch.nn.BCELoss() # Initialize generator and discriminator generator = Generator(img_shape) discriminator = Discriminator() if cuda: generator.cuda() discriminator.cuda() adversarial_loss.cuda() dataloader = torch.utils.data.DataLoader( datasets.MNIST( "../../data/mnist", train=True, download=True, transform=transforms.Compose([ transforms.Resize(args.img_size), transforms.ToTensor(), transforms.Normalize([0.5], [0.5]) ]), ), batch_size=args.batch_size, shuffle=True, ) train(generator, discriminator, dataloader, args, cuda, adversarial_loss)
def setup_train_generator(self, model_file_path=None): generator = Generator(num_embeddings=config.vocab_size, # 4999 embedding_dim=config.emb_dim, # 128 n_labels=config.vocab_size, # 4999 pad_length=config.padding, # 20 encoder_units=config.hidden_dim, # 256 decoder_units=config.hidden_dim, # 256 ) model = generator.model() model.summary() model.compile(optimizer='adagrad', lr=config.lr, loss='categorical_crossentropy', metrics=['accuracy']) print('Generator Compiled.') try: model.fit_generator(generator=self.train_batcher.next_batch(), samples_per_epoch=5, validation_data=self.val_batcher.next_batch(), callbacks=[cp], verbose=1, nb_val_samples=1, nb_epoch=config.max_iterations) except KeyboardInterrupt as e: print('Generator training stopped early.') print('Generator training complete.')
def generate_faces(): # Inference Path # make_dirs(config.inference_path) # Prepare Generator # G = Generator().to(device) G.load_state_dict( torch.load( os.path.join( config.weights_path, 'Face_Generator_Epoch_{}.pkl'.format(config.num_epochs)))) G.eval() # Start Generating Faces # count = 1 while (True): # Prepare Fixed Noise and Generator # noise = torch.randn(config.batch_size, config.noise_dim).to(device) generated = G.generate(noise) for i in range(config.batch_size): save_image( denorm(generated[i].data), os.path.join(config.inference_path, "Generated_CelebA_Faces_{}.png".format(count)), ) count += 1 if count > config.limit: print("Generating fake CelebA faces is finished.") break
def get_model(latent_dim, img_shape, model_type="fc"): if model_type == "fc": generator = Generator(latent_dim=latent_dim, img_shape=img_shape) discriminator = Discriminator(img_shape=img_shape) return generator, discriminator else: raise ValueError(f"wrong type of {model_type}")
def initialize( config: Optional[Any], num_channels: int ) -> Tuple[Module, Optimizer, Module, Union[_LRScheduler, ParamScheduler]]: """Initializing model, optimizer, loss function, and lr scheduler with correct settings. Parameters ---------- config config object num_channels number of channels for Generator Returns ------- model, optimizer, loss_fn, lr_scheduler """ netG = idist.auto_model(Generator(config.z_dim, config.g_filters, num_channels)) netD = idist.auto_model(Discriminator(num_channels, config.d_filters)) loss_fn = nn.BCELoss() optimizerG = optim.Adam(netG.parameters(), lr=config.lr, betas=(config.beta_1, 0.999)) optimizerD = optim.Adam(netD.parameters(), lr=config.lr, betas=(config.beta_1, 0.999)) loss_fn = loss_fn.to(idist.device()) return netD, netG, optimizerD, optimizerG, loss_fn, None
def main(): print('Initializing Training Process..') parser = argparse.ArgumentParser() parser.add_argument('--group_name', default=None) parser.add_argument('--checkpoint_path', default='cp_hifigan') parser.add_argument('--config', default='config_8k.json') parser.add_argument('--training_epochs', default=3100, type=int) parser.add_argument('--stdout_interval', default=5, type=int) parser.add_argument('--checkpoint_interval', default=5000, type=int) parser.add_argument('--summary_interval', default=100, type=int) parser.add_argument('--validation_interval', default=1000, type=int) parser.add_argument('--fine_tuning', default=False, type=bool) a = parser.parse_args() with open(a.config) as f: data = f.read() json_config = json.loads(data) h = AttrDict(json_config) build_env(a.config, 'config.json', a.checkpoint_path) model = Generator(h) inputs = torch.randn(10, 80, 80) output = model(inputs) print(output.shape)
def _init_models(self): self.generator = Generator(self.config.class_num, self.config.conv_dim, self.config.layer_num) self.discriminator = Discriminator(self.config.image_size, self.config.conv_dim, self.config.layer_num, self.config.class_num) self.generator = torch.nn.DataParallel(self.generator).to(self.device) self.discriminator = torch.nn.DataParallel(self.discriminator).to(self.device)
def __init__(self, F, weights_initializer=tcl.xavier_initializer(), regularizer=None): ''' Args: F: Contain the parameters(FLAGS) for setting the model ''' self.F = F self.weights_initializer = weights_initializer self.regularizer = regularizer self.D = Discriminator(is_training=F.is_training, weights_initializer=weights_initializer) self.G = Generator(F.output_height, F.output_width, is_training=F.is_training, weights_initializer=weights_initializer) if F.is_training: self.epoch_id = 1 self.batch_id = 1 # We expect no remainders, to make the calculated `batch_id` precise self.n_batch = F.train_size // F.batch_size # width param to format verbose information self.batch_width = len(str(self.n_batch)) if F.training_strategy == 1: self.step_width = len( str(F.epoch * self.n_batch * (1 + F.g_step / F.d_step))) elif F.training_strategy == 2: self.step_width = len( str(F.epoch * self.n_batch * (F.d_step + F.g_step)))
def inference(a): generator = Generator(h).to(device) state_dict_g = load_checkpoint(a.checkpoint_file, device) generator.load_state_dict(state_dict_g["generator"]) filelist = os.listdir(a.input_mels_dir) os.makedirs(a.output_dir, exist_ok=True) generator.eval() generator.remove_weight_norm() with torch.no_grad(): for i, filname in enumerate(filelist): if ".npy" not in filname: continue x = np.load(os.path.join(a.input_mels_dir, filname)) x = torch.FloatTensor(x).to(device) y_g_hat = generator(x) audio = y_g_hat.squeeze() audio = audio * MAX_WAV_VALUE audio = audio.cpu().numpy().astype("int16") output_file = os.path.join(a.output_dir, os.path.splitext(filname)[0] + ".wav") write(output_file, h.sampling_rate, audio) print(output_file)
def buildModel(self, optimizer="adam"): self.G = Generator() self.D = Discriminator() self.I = InceptionExtractor() self.I.eval() # dont train IneceptionExtractor if self.useCuda: self.G.to(self.device) self.D.to(self.device) self.I.to(self.device) if optimizer == "adam": self.gOptim = optim.Adam(self.G.parameters(), lr=self.learningRate, betas=(0.0, 0.9)) self.dOptim = optim.Adam(self.D.parameters(), lr=4 * self.learningRate, betas=(0.0, 0.9)) elif optimizer == "rms": self.gOptim = optim.RMSprop(self.G.parameters(), lr=self.learningRate) self.dOptim = optim.RMSprop(self.D.parameters(), lr=4 * self.learningRate) else: print("Unrecognized Optimizer !") return 0
def setup_train_wgan_model(self): generator = Generator(num_embeddings=config.vocab_size, # 4999 embedding_dim=config.emb_dim, # 128 n_labels=config.vocab_size, # 4999 pad_length=config.padding, # 20 encoder_units=config.hidden_dim, # 256 decoder_units=config.hidden_dim, # 256 ).model() reconstructor = Reconstructor(num_embeddings=config.vocab_size, # 4999 embedding_dim=config.emb_dim, # 128 n_labels=config.vocab_size, # 4999 pad_length=config.padding, # 20 encoder_units=config.hidden_dim, # 256 decoder_units=config.hidden_dim, # 256 ).model() discriminator = Discriminator().model() wgan = WGAN(generator=generator, reconstructor=reconstructor, discriminator=discriminator, ) try: wgan.train(self.train_batcher.next_batch()) except KeyboardInterrupt as e: print('WGAN training stopped early.') print('WGAN training complete.')
def main(): config = Config() print("Processing text for \'%s\'." % (config.text_file)) data = preprocess.preprocess(config.text_file, 'infer', config) dataloader = dataprocess.load_infer(data, config) G = Generator(config) G.load_state_dict(load_weights(config.checkpoint_file)) G = set_device(G, config.device, config.use_cpu) G.eval() print("Generating spectrogram with \'%s\'." % (config.checkpoint_file)) spec = [] y_prev = torch.zeros(1, config.prev_length, config.fft_size // 2 + 1) for x in tqdm(dataloader, leave=False, ascii=True): x, y_prev = set_device((x, y_prev), config.device, config.use_cpu) y_gen = G(x, y_prev) y_gen = y_gen.squeeze(1) y_prev = y_gen[:, -config.prev_length:, :] spec.append(y_gen.data) print("Generating audio with Griffin-Lim algorithm.") spec = torch.cat(spec, dim=1).transpose(1, 2) # T x D -> D x T wave = dsp.inv_spectrogram(spec, config) savename = config.checkpoint_file.replace('.pt', '_') + os.path.basename( config.text_file).replace('.txt', '.wav') dsp.save(savename, wave, config.sample_rate) print("Audio saved to \'%s\'." % (savename))
def __init__(self, channel=3, latent_dim=500, nb_growing=8, gp_lambda=10, d_norm_eps=1e-3, upsampling='subpixel', downsampling='stride', lr_d=1e-4, lr_g=1e-4): self.channel = channel self.lr_d = lr_d self.lr_g = lr_g self.nb_growing = nb_growing self.discriminator = Discriminator(nb_growing=nb_growing, downsampling=downsampling, channel=self.channel) self.generator = Generator(nb_growing=nb_growing, upsampling_=upsampling, channel=self.channel) self.latent_dim = latent_dim self.z = Input((self.latent_dim, ), name='z') self.bs = tf.placeholder(tf.int32, shape=[]) self.gp_lambda = gp_lambda self.d_norm_eps = d_norm_eps self.sess = None self.saver = None self.fake = None
def inference(a, h): generator = Generator(h).to(device) state_dict_g = load_checkpoint(a.checkpoint_file, device) generator.load_state_dict(state_dict_g['generator']) os.makedirs(a.output_dir, exist_ok=True) generator.eval() generator.remove_weight_norm() with torch.no_grad(): for i, fpath in enumerate( glob.glob(os.path.join(a.input_wavs_dir, "*.wav"))): filname = os.path.split(fpath)[-1] wav, sr = load_wav(fpath) wav = wav / MAX_WAV_VALUE wav = torch.FloatTensor(wav).to(device) x = get_mel(wav.unsqueeze(0)) y_g_hat = generator(x) audio = y_g_hat.squeeze() audio = audio * MAX_WAV_VALUE audio = audio.cpu().numpy().astype('int16') output_file = os.path.join( a.output_dir, os.path.splitext(filname)[0] + '_generated.wav') write(output_file, h.sampling_rate, audio) print(output_file)
def eval_ckpt(): parser = argparse.ArgumentParser() parser.add_argument("config_paths", nargs="+", help="path to config.yaml") parser.add_argument("--weight", help="path to weight to evaluate.pth") parser.add_argument("--result_dir", help="path to save the result file") args, left_argv = parser.parse_known_args() cfg = Config(*args.config_paths, default="cfgs/defaults.yaml") cfg.argv_update(left_argv) img_dir = Path(args.result_dir) img_dir.mkdir(parents=True, exist_ok=True) trn_transform, val_transform = setup_transforms(cfg) g_kwargs = cfg.get('g_args', {}) gen = Generator(1, cfg.C, 1, **g_kwargs).cuda() weight = torch.load(args.weight) if "generator_ema" in weight: weight = weight["generator_ema"] gen.load_state_dict(weight) test_dset, test_loader = get_test_loader(cfg, val_transform) for batch in test_loader: style_imgs = batch["style_imgs"].cuda() char_imgs = batch["source_imgs"].unsqueeze(1).cuda() out = gen.gen_from_style_char(style_imgs, char_imgs) fonts = batch["fonts"] chars = batch["chars"] for image, font, char in zip(refine(out), fonts, chars): (img_dir / font).mkdir(parents=True, exist_ok=True) path = img_dir / font / f"{char}.png" save_tensor_to_image(image, path)
def validate_models(channels): """ Validate trained models :param channels: List of compressed channels used :return: None """ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") test_loader = test_dataloader() test_batch = next(iter(test_loader)).to(device) reconstructed_images = {} for channel in channels: NUM_CHANNELS = channel encoder = Encoder(NUM_CHANNELS).to(device) generator = Generator(NUM_CHANNELS).to(device) encoder.load_state_dict( torch.load(f"../models/encoder_{NUM_CHANNELS}.model", map_location=torch.device("cpu")) ) generator.load_state_dict( torch.load(f"../models/generator_{NUM_CHANNELS}.model", map_location=torch.device("cpu")) ) encoder.eval() generator.eval() reconstructed_image = generator(encoder(test_batch)) reconstructed_images[NUM_CHANNELS] = reconstructed_image plot_image_grid(test_batch, reconstructed_images, NUM_IMAGES_GRID) save_images(test_batch, reconstructed_images) calculate_metric(channels)
def main(): parser = argparse.ArgumentParser(description="PGGAN") parser.add_argument("--num_stages", type=int, default=3) parser.add_argument("--num_epochs", type=int, default=32) parser.add_argument("--base_channels", type=int, default=16) parser.add_argument("--batch_size", type=list, default=[32, 32, 32, 32, 32, 16, 8, 4, 2]) parser.add_argument("--data_root", type=str, default="./data") parser.add_argument("--dataset", type=str, default="mnist") parser.add_argument("--image_size", type=int, default=32) parser.add_argument("--image_channels", type=int, default=1) parser.add_argument("--device", type=str, default="cuda") opt = parser.parse_args() generator = Generator(max_stage=opt.num_stages, base_channels=opt.base_channels, image_channels=opt.image_channels).to(opt.device) discriminator = Discriminator(max_stage=opt.num_stages, base_channels=opt.base_channels, image_channels=opt.image_channels).to( opt.device) train(generator, discriminator, opt) torch.save(generator.state_dict(), "./weights/generator.pth") torch.save(discriminator.state_dict(), "./weights/discriminator.pth")
def inference(a): generator = Generator(h).to(device) state_dict_g = load_checkpoint(a.checkpoint_file, device) generator.load_state_dict(state_dict_g['generator']) pqmf = PQMF(N=4, taps=62, cutoff=0.15, beta=9.0).cuda() filelist = os.listdir(a.input_mels_dir) os.makedirs(a.output_dir, exist_ok=True) generator.eval() generator.remove_weight_norm() with torch.no_grad(): for i, filname in enumerate(filelist): x = np.load(os.path.join(a.input_mels_dir, filname)) x = torch.FloatTensor(x).to(device) y_g_hat = generator(x) if h.output_channel > 1: y_mb_ = y_g_hat y_g_hat = pqmf.synthesis(y_mb_) audio = y_g_hat.squeeze() audio = audio * MAX_WAV_VALUE audio = audio.cpu().numpy().astype('int16') output_file = os.path.join( a.output_dir, os.path.splitext(filname)[0] + '_generated_e2e.wav') write(output_file, h.sampling_rate, audio) print(output_file)
def inference(a): generator = Generator(h).to(device) state_dict_g = load_checkpoint(a.checkpoint_file, device) generator.load_state_dict(state_dict_g['generator']) filelist = glob.glob(f"{a.input_mels_dir}/*.npy") os.makedirs(a.output_dir, exist_ok=True) generator.eval() generator.remove_weight_norm() with torch.no_grad(): total_time = 0.0 for file_path in tqdm.tqdm(filelist): x = np.load(file_path) x = torch.FloatTensor(x).to(device) if len(x.shape) < 3: # for mel from vivos x = x.unsqueeze(0) start_time = time.time() y_g_hat = generator(x) audio = y_g_hat.squeeze() audio = audio * MAX_WAV_VALUE audio = audio.cpu().detach().numpy().astype('int16') total_time += time.time() - start_time file_name = os.path.basename(file_path).split('.')[0] output_file = os.path.join(a.output_dir, file_name + '_generated_from_mel.wav') write(output_file, h.sampling_rate, audio) print("Elapsed time:", total_time)