def adapt_f0(s, t): if use_predicted_pitch: s = utils.to_gpu(torch.from_numpy(s)).view(1, -1, 1).float() t = utils.to_gpu(torch.from_numpy(t)).view(1, -1, 1).float() s = pitch_model(s, t)[0, :].cpu().numpy() return s else: tmp_s = np.asarray([x for x in s if x > 0]).mean() tmp_t = np.asarray([x for x in t if x > 0]).mean() for i in range(s.shape[0]): if s[i] > 0: s[i] = s[i] * tmp_t / tmp_s return s
def main(model_filename, pitch_model_filename, output_dir, batch_size): model = torch.nn.Module() model.add_module('encoder', Encoder(**encoder_config)) model.add_module('generator', Generator(sum(encoder_config['n_out_channels']))) model = load_checkpoint(model_filename, model).cuda() model.eval() if os.path.isfile(pitch_model_filename): global pitch_model, use_predicted_pitch use_predicted_pitch = True pitch_model = PitchModel(**pitch_config) pitch_model = load_checkpoint(pitch_model_filename, pitch_model).cuda() pitch_model.eval() testset = TestSet(**(data_config)) cond, name = testset[0] for files in chunker(testset, batch_size): files = list(zip(*files)) cond_input, file_paths = files[:-1], files[-1] cond_input = [ utils.to_gpu(torch.from_numpy(np.stack(x))).float() for x in cond_input ] #cond_input = model.encoder(cond_input.transpose(1, 2)).transpose(1, 2) cond_input = model.encoder(cond_input[0]) audio = model.generator(cond_input) for i, file_path in enumerate(file_paths): print("writing {}".format(file_path)) wav = audio[i].cpu().squeeze().detach().numpy() * 32768.0 write("{}/{}.wav".format(output_dir, file_path), data_config['sampling_rate'], wav.astype(np.int16))
def get_noise(self, num_samples=None): if num_samples is None: num_samples = self.cfg.batch_size noise = Variable(torch.ones(num_samples, self.cfg.z_size)) noise = to_gpu(self.cfg.cuda, noise) noise.data.normal_(0, 1) return noise
def _decode_free_run(self, code_w, max_len): code_w = code_w.unsqueeze(1) batch_size = code_w.size(0) # <sos> sos_w = self._get_sos_batch(batch_size, self.vocab_w) embed_in_w = self.embed_w(sos_w) # sos_embedding : [batch_size, 1, embedding_size] state_w = self._init_hidden(batch_size, self.cfg.hidden_size_w) # unroll if self.cfg.dec_embed: all_embed_w = [] # for differentiable input of discriminator all_prob_w = [] # for grad norm scaling all_id_w = [] finished = torch.ByteTensor(batch_size, 1).zero_() finished = to_gpu(self.cfg.cuda, Variable(finished, requires_grad=False)) for i in range(max_len): # for each step # Decoder input_w = torch.cat([embed_in_w, code_w], 2) output_w, state_w = self.decoder(input_w, state_w) if self.cfg.dec_embed: embed_out_w = self.linear_w(output_w) cosim_w = self._compute_cosine_sim(embed_out_w, self.embed_w.embed) prob_w = F.log_softmax(cosim_w * self.cfg.embed_temp, 2) _, id_w = torch.max(cosim_w, 2) # if eos token has already appeared, fill zeros id_w, embed_out_w, finished = \ self._pads_after_eos(id_w, embed_out_w, finished) else: prob_w = F.log_softmax(self.linear_w(output_w), 2) _, id_w = torch.max(prob_w, 2) id_w, finished = self._pad_ids_after_eos(id_w, finished) # NOTE : words_prob is not considered here embed_in_w = self.embed_w(id_w) #embed_in_w = embed_out_w # append generated token ids & outs at each step if self.cfg.dec_embed: all_embed_w.append(embed_out_w) all_prob_w.append(prob_w) all_id_w.append(id_w) # concatenate all the results # words_id = torch.cat(all_words_id, 1) if self.cfg.dec_embed: embed_w = torch.cat(all_embed_w, 1) prob_w = torch.cat(all_prob_w, 1) id_w = torch.cat(all_id_w, 1) if self.cfg.dec_embed: return self.packer_w.new(probs=prob_w, ids=id_w, embeds=embed_w) else: return self.packer_w.new(probs=prob_w, ids=id_w)
def _get_interpolated_z(self, num_samples): # sample 2 points and compute the distance btwn them z_a = np.random.normal(0, 1, (1, self.cfg.z_size)) z_b = np.random.normal(0, 1, (1, self.cfg.z_size)) # get intermediate points by interpolation offset = (z_b - z_a) / num_samples z = np.vstack([z_a + offset * i for i in range(num_samples)]) return to_gpu(self.cfg.cuda, Variable(torch.FloatTensor(z)))
def _decode_from_z(self, z): self.net.set_modules_train_mode(True) # Build graph z = Variable(torch.FloatTensor(z)) z = to_gpu(self.cfg.cuda, z) code_fake = self.net.gen(z) decoded = self.net.dec.free_running(code_fake, self.cfg.max_len) return decoded
def __init__(self, alpha=0, class_weights=None, num_classes=1): if class_weights is not None: nll_weight = to_gpu(torch.from_numpy(class_weights.astype(dtype=np.float32))) else: nll_weight = None self.nll_loss = nn.NLLLoss(weight=nll_weight) self.alpha = alpha self.num_classes = num_classes
def to_one_hot(cfg, indices, num_class): size = indices.size() dim = len(size) indices = torch.unsqueeze(indices.data, dim) one_hot = torch.FloatTensor(*size, num_class).zero_() if isinstance(indices, Variable): one_hot = Variable(one_hot, requires_grad=False) if cfg.cuda: one_hot = to_gpu(cfg.cuda, one_hot) one_hot.scatter_(dim, indices, 1.) return one_hot
def forward(self, noise): assert noise.size(1) == self.cfg.z_size x = noise for i, layer in enumerate(self.layers): x = layer(x) if self._with_noise: noise = torch.normal(mean=torch.zeros(x.size()), std=0.1) noise = to_gpu(self.cfg.cuda, Variable(noise)) x = x + noise with_noise = False return x
def parse_batch(self, batch): current_text_padded, input_lengths, pre_text_padded, pre_text_len, post_text_padded, post_text_len, \ mel_padded, gate_padded, output_lengths = batch current_text_padded = to_gpu(current_text_padded).long() input_lengths = to_gpu(input_lengths).long() pre_text_padded = to_gpu(pre_text_padded).long() pre_text_len = to_gpu(pre_text_len).long() post_text_padded = to_gpu(post_text_padded).long() post_text_len = to_gpu(post_text_len).long() max_len = torch.max(input_lengths.data).item() mel_padded = to_gpu(mel_padded).float() gate_padded = to_gpu(gate_padded).float() output_lengths = to_gpu(output_lengths).long() return ((current_text_padded, input_lengths, pre_text_padded, pre_text_len, post_text_padded, post_text_len, \ mel_padded, max_len, output_lengths), (mel_padded, gate_padded))
def parse_batch(self, batch): inputs, alignments, inputs_ctc = batch inputs = utl.Inputs(text=utl.to_gpu(inputs.text).long(), mels=utl.to_gpu(inputs.mels).float(), gate=utl.to_gpu(inputs.gate).float(), text_len=utl.to_gpu(inputs.text_len).long(), mel_len=utl.to_gpu(inputs.mel_len).long()) if alignments is not None: alignments = utl.to_gpu(inputs.alignments).float() if inputs_ctc is not None: inputs_ctc = utl.InputsCTC(text=utl.to_gpu(inputs_ctc.text).long(), length=utl.to_gpu( inputs_ctc.length).long()) return inputs, alignments, inputs_ctc
def __init__(self, net): log.info("Training start!") #set_random_seed(net.cfg) self.net = net self.cfg = net.cfg #self.fixed_noise = net.gen.make_noise_size_of(net.cfg.eval_size) self.test_sents = load_test_data(net.cfg) self.pos_one = to_gpu(net.cfg.cuda, torch.FloatTensor([1])) self.neg_one = self.pos_one * (-1) self.result = ResultWriter(net.cfg) self.sv = TrainingSupervisor(net, self.result) #self.sv.interval_func_train.update({net.enc.decay_noise_radius: 200}) while not self.sv.is_end_of_training(): self.train_loop(self.cfg, self.net, self.sv)
def __init__(self, net): log.info("Testing start!") # set_random_seed(net.cfg) self.net = net self.cfg = net.cfg #self.fixed_noise = net.gen.make_noise_size_of(net.cfg.eval_size) self.test_sents = load_test_data(net.cfg) self.pos_one = to_gpu(net.cfg.cuda, torch.FloatTensor([1])) self.neg_one = self.pos_one * (-1) self.result = ResultWriter(net.cfg) self.sv = TestingSupervisor(net, self.result) #self.sv.interval_func_train.update({net.enc.decay_noise_radius: 200}) self.num_sample = 10 self.max_sample = 64 spacy_en = spacy.load('en') self.tokenizer = lambda s: [tok.text for tok in spacy_en.tokenizer(s)] end_of_loop = False while not end_of_loop: end_of_loop = self.test_loop(self.cfg, self.net, self.sv)
def forward(self, enc_h): #code = F.relu(code) self._mu = mu = self.mu_layers(enc_h) if self._with_var: #self._sigma = sigma = self.sigma_layers(enc_h) self._logvar = logvar = self.sigma_layers(enc_h) self._sigma = sigma = torch.exp(logvar * 0.5) #log_sigma = self.sigma_layers(enc_h) #self._sigma = sigma = torch.exp(log_sigma) std = np.random.normal(0, 1, size=sigma.size()) std = Variable(torch.from_numpy(std).float(), requires_grad=False) std = to_gpu(self.cfg.cuda, std) code = mu + sigma * std else: code = mu self._sigma = None self._with_var = True # normalization if self.cfg.code_norm: code = self._normalize(code) return code
def predict_batch(models: nn.ModuleList, path2images, path2save, thresh=0.5): """ Perfrom prediction for a batch images Params: models : NN models path2images : path to an image path2save : should be a dir thresh : preiction threshold """ path2images = Path(path2images) path2save = Path(path2save) if not path2images.is_dir(): raise RuntimeError("File '{}' is not dir.".format(str(path2images))) if not path2save.is_dir(): raise RuntimeError("File '{}' is not dir.".format(str(path2save))) imgs_paths = sorted(list(path2images.glob("*"))) count_processed = 0 for idx, ip in enumerate(imgs_paths): src_img = cv2.imread(str(ip)) transform = test_trasformations() augmented = transform(image=src_img) src_img = augmented["image"] img2predict = src_img.copy() img2predict = cv2.cvtColor(img2predict, cv2.COLOR_BGR2RGB).astype(dtype=np.float32) img2predict = normalize(img2predict) img2predict = utils.to_gpu( numpy_to_tensor(img2predict).unsqueeze(0).contiguous()).float() if len(models) == 1: model = models[0].eval() with torch.set_grad_enabled(False): predict = model(img2predict) #Probs predict = F.sigmoid(predict).squeeze(0).squeeze(0) mask = (predict > thresh).cpu().numpy().astype(dtype=np.uint8) overlayed_img = alpha_overlay(src_img, mask) else: #Averaging all predictions for one point of test data sum_predicts = utils.to_gpu( torch.zeros( (1, 1, src_img.shape[0], src_img.shape[1])).float()) for model in models: model.eval() with torch.set_grad_enabled(False): predict = model(img2predict) sum_predicts += F.sigmoid(predict) predict = (sum_predicts / len(models)).squeeze(0).squeeze(0).float() mask = (predict > thresh).cpu().numpy().astype(dtype=np.uint8) overlayed_img = alpha_overlay(src_img, mask) #save cv2.imwrite(str(path2save / "{}".format(ip.name)), overlayed_img) print("Image '{}' was processed successfully.".format(str(ip))) count_processed += 1 print("{} images were processed.".format(count_processed))
def _init_hidden(self, bsz, nhidden): nlayers = self.cfg.nlayers zeros1 = Variable(torch.zeros(nlayers, bsz, nhidden)) zeros2 = Variable(torch.zeros(nlayers, bsz, nhidden)) return (to_gpu(self.cfg.cuda, zeros1), to_gpu(self.cfg.cuda, zeros2))
def _init_state(self, bsz): zeros = Variable(torch.zeros(self.cfg.nlayers, bsz, self.cfg.nhidden)) return to_gpu(self.cfg.cuda, zeros)
def make_noise_size_of(self, *size): noise = Variable(torch.ones(*size)) noise = to_gpu(self.cfg.cuda, noise) noise.data.normal_(0, 1) return noise
def predict(models: nn.ModuleList, img_path, path2save, thresh=0.5): """ Perfrom prediction for single image Params: models : NN models img_path : path to an image path2save : thresh : preiction threshold """ img_path = Path(img_path) if not img_path.exists(): raise FileNotFoundError("File '{}' not found.".format(str(img_path))) src_img = cv2.imread(str(img_path)) transform = test_trasformations() augmented = transform(image=src_img) src_img = augmented["image"] img2predict = src_img.copy() img2predict = cv2.cvtColor(img2predict, cv2.COLOR_BGR2RGB).astype(dtype=np.float32) img2predict = normalize(img2predict) img2predict = utils.to_gpu( numpy_to_tensor(img2predict).unsqueeze(0).contiguous()).float() if len(models) == 1: #evaluate mode model = models[0].eval() with torch.set_grad_enabled(False): predict = model(img2predict) #Probs predict = F.sigmoid(predict).squeeze(0).squeeze(0) mask = (predict > thresh).cpu().numpy().astype(dtype=np.uint8) overlayed_img = alpha_overlay(src_img, mask) else: #Averaging all predictions for one point of test data sum_predicts = utils.to_gpu( torch.zeros((1, 1, src_img.shape[0], src_img.shape[1])).float()) for model in models: model.eval() with torch.set_grad_enabled(False): predict = model(img2predict) sum_predicts += F.sigmoid(predict) predict = (sum_predicts / len(models)).squeeze(0).squeeze(0).float() mask = (predict > thresh).cpu().numpy().astype(dtype=np.uint8) overlayed_img = alpha_overlay(src_img, mask) #save cv2.imwrite(path2save, overlayed_img) #show cv2.imshow("Predicted", overlayed_img) cv2.waitKey(0) cv2.destroyAllWindows() print("Image '{}' was processed successfully.".format(str(img_path)))
def _get_sos_batch(self, bsz, vocab): sos_ids = to_gpu(self.cfg.cuda, Variable(torch.ones(bsz, 1).long())) sos_ids.fill_(vocab.SOS_ID) return sos_ids
def _add_noise_to(self, code, std): if std > 0: noise = torch.normal(mean=torch.zeros(code.size()), std=std) noise = to_gpu(self.cfg.cuda, Variable(noise)) code = code + noise return code
def evaluate(path2models, model: nn.Module, threshold: float, holdout_dataset: str, evaluate_one=True): """ Perform evaluation of a model or list of models. Params: path2models : path to a model(or models in cross-validation case) model : A model. Can be RekNetM1, etc threshold : holdout_datset : path to a hold-out dataset(must contains 'imgs' and 'masks' subdirs) evaluate_one : if True then perform evaluation of one model. Otherwise evaluation of multiple models(in case of cross-val) """ assert threshold >= 0 and threshold < 1.0, "Error. Invalid threshold: {}".format(threshold) path2models = Path(path2models) holdout_dataset = Path(holdout_dataset) img_paths = list(map(str, (holdout_dataset / 'imgs').glob('*'))) mask_paths = list(map(str, (holdout_dataset / 'masks').glob('*'))) test_dataset = RoadDataset2(img_paths=img_paths, mask_paths=mask_paths, transforms=valid_tranformations()) fmax_test_datset = RoadDataset2(img_paths=img_paths, mask_paths=mask_paths, transforms=valid_tranformations(), fmeasure_eval=True) #metrics lists jaccards = [] dices = [] if evaluate_one: path2models = path2models / 'model.pt' print("Evluation for the single model: {}".format(str(path2models))) if not path2models.exists(): raise RuntimeError("Model {} does not exists.".format(str(path2models))) state = torch.load(str(path2models)) model.load_state_dict(state["model"]) #eval mode model.eval() for idx, data in enumerate(test_dataset): img, mask = data img = to_gpu(img.unsqueeze(0).contiguous()) mask = to_gpu(mask.unsqueeze(0).contiguous()) with torch.set_grad_enabled(False): predict = model(img) predict = F.sigmoid(predict) jacc = jaccard(mask, (predict > threshold).float()) d = dice(mask, (predict > threshold).float()) jaccards.append(jacc) dices.append(d) evaluation_jaccard = np.mean(jaccards).astype(dtype=np.float64) evaluation_dice = np.mean(dices).astype(dtype=np.float64) uu_metrics, um_metrics, umm_metrics = fmeasure_evaluation([model], valid_dataset=fmax_test_datset) return {"eval_jacc" : evaluation_jaccard, "eval_dice" : evaluation_dice}, uu_metrics, um_metrics, umm_metrics else: #Imporant! path2models dir should contains a few subdirs. These subdirs by itself contains models which were trained on folds. list_models_paths = sorted(list(path2models.glob('*'))) print("Evaluation for multiple models: {}".format([str(lmp/'model.pt') for lmp in list_models_paths])) models_list = [] for lmp in list_models_paths: model_path = lmp / 'model.pt' if not model_path.exists(): raise RuntimeError("Model {} does not exists.".format(str(model_path))) state = torch.load(str(model_path)) model.load_state_dict(state["model"]) models_list.append(model.eval()) #Evaluate on the test data for idx, data in enumerate(test_dataset): img, mask = data img = to_gpu(img.unsqueeze(0).contiguous()) mask = to_gpu(mask.unsqueeze(0).contiguous()) #Averaging all predictions for one point of test data sum_predicts = to_gpu(torch.zeros(mask.shape).float()) for m in models_list: with torch.set_grad_enabled(False): predict = m(img) sum_predicts += F.sigmoid(predict) predict = (sum_predicts / len(models_list)).float() jacc = jaccard(mask, (predict > threshold).float()) d = dice(mask, (predict > threshold).float()) jaccards.append(jacc) dices.append(d) evaluation_jaccard = np.mean(jaccards).astype(dtype=np.float64) evaluation_dice = np.mean(dices).astype(dtype=np.float64) uu_metrics, um_metrics, umm_metrics = fmeasure_evaluation(models_list, valid_dataset=fmax_test_datset) return {"eval_jacc" : evaluation_jaccard, "eval_dice" : evaluation_dice}, uu_metrics, um_metrics, umm_metrics
def train(num_gpus, rank, group_name, output_directory, epochs, g_learning_rate, d_learning_rate, adv_ag, adv_fd, lamda_adv, lamda_feat, warmup_steps, decay_learning_rate, iters_per_checkpoint, batch_size, seed, checkpoint_path): torch.manual_seed(seed) torch.cuda.manual_seed(seed) #=====START: ADDED FOR DISTRIBUTED====== if num_gpus > 1: init_distributed(rank, num_gpus, group_name, **dist_config) #=====END: ADDED FOR DISTRIBUTED====== model = torch.nn.Module() model.add_module('encoder', Encoder(**encoder_config)) model.add_module('generator', Generator(sum(encoder_config['n_out_channels']))) model.add_module('discriminator', MultiScaleDiscriminator(**discriminator_config)) model.add_module( 'disentangler', Disentangler(encoder_config['n_out_channels'][0], sum(encoder_config['n_out_channels'][1:]))) model = model.cuda() #=====START: ADDED FOR DISTRIBUTED====== if num_gpus > 1: model = apply_gradient_allreduce(model) #=====END: ADDED FOR DISTRIBUTED====== # Using RAdam as optimizer # Lookahead has resume training issues: # lr schedule doesn't affect nested RAdam of Lookahead g_parameters = list(model.generator.parameters()) g_parameters = list(model.encoder.parameters()) + g_parameters g_optimizer = RAdam(g_parameters, lr=g_learning_rate) d_parameters = list(model.discriminator.parameters()) d_parameters = list(model.disentangler.parameters()) + d_parameters d_optimizer = RAdam(d_parameters, lr=d_learning_rate) # Load checkpoint if one exists iteration = 0 if checkpoint_path != "": model, g_optimizer, d_optimizer, iteration = load_checkpoint( checkpoint_path, model, g_optimizer, d_optimizer) iteration += 1 # next iteration is iteration + 1 customer_g_optimizer = Optimizer(g_optimizer, g_learning_rate, iteration, warmup_steps, decay_learning_rate) customer_d_optimizer = Optimizer(d_optimizer, d_learning_rate, iteration, warmup_steps, decay_learning_rate) criterion = nn.MSELoss() l1_loss = nn.L1Loss() stft_criterion = MultiResolutionSTFTLoss() trainset = Dataset(**data_config) # =====START: ADDED FOR DISTRIBUTED====== train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None # =====END: ADDED FOR DISTRIBUTED====== train_loader = DataLoader(trainset, num_workers=1, shuffle=(train_sampler is None), sampler=train_sampler, batch_size=batch_size, pin_memory=False, drop_last=True) # Get shared output_directory ready if rank == 0: if not os.path.isdir(output_directory): os.makedirs(output_directory) os.chmod(output_directory, 0o775) print("output directory", output_directory) logdir = os.path.join( output_directory, time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())) os.makedirs(logdir, exist_ok=True) writer = SummaryWriter(logdir=logdir) anchors = [ 'loss_g', 'loss_g_sc', 'loss_g_mag', 'loss_g_adv', 'loss_g_feat', 'loss_g_fd', 'loss_d', 'loss_d_real', 'loss_d_fake', 'loss_d_fd' ] meters = { x: LossMeter(x, writer, 100, iteration, True) for x in anchors } model.train() epoch_offset = max(0, int(iteration / len(train_loader))) # ================ MAIN TRAINNIG LOOP! =================== for epoch in range(epoch_offset, epochs): train_sampler.set_epoch(epoch) if train_sampler is not None else None tbar = tqdm( enumerate(train_loader)) if rank == 0 else enumerate(train_loader) for i, batch in tbar: model.zero_grad() cond, a = [to_gpu(x) for x in batch] # Get generator outputs x = model.encoder(cond) g_outputs = model.generator(x) losses = {} # Get Discrimiantor loss customer_d_optimizer.zero_grad() d_loss = [] # Adversarial training for audio generation if adv_ag == True: real_scores, _ = model.discriminator(a.unsqueeze(1)) fake_scores, _ = model.discriminator(g_outputs.detach()) d_loss_fake_list, d_loss_real_list = [], [] for (real_score, fake_score) in zip(real_scores, fake_scores): d_loss_real_list.append( criterion(real_score, torch.ones_like(real_score))) d_loss_fake_list.append( criterion(fake_score, torch.zeros_like(fake_score))) d_loss_real = sum(d_loss_real_list) / len(d_loss_real_list) d_loss_fake = sum(d_loss_fake_list) / len(d_loss_fake_list) d_loss = d_loss + [d_loss_real, d_loss_fake] losses.update({ 'loss_d_real': d_loss_real, 'loss_d_fake': d_loss_fake }) # Adversarial training for feature disentanglement if adv_fd == True: split_x = torch.split(x.detach(), encoder_config['n_out_channels'], dim=1) pred = model.disentangler(split_x[0]) d_loss_fd = F.l1_loss(pred, torch.cat((split_x[1:]), dim=1)) d_loss = d_loss + [d_loss_fd] losses.update({'loss_d_fd': d_loss_fd}) if len(d_loss) > 0: d_loss = sum(d_loss) d_loss.backward() nn.utils.clip_grad_norm_(d_parameters, max_norm=10) customer_d_optimizer.step_and_update_lr() losses.update({'loss_d': d_loss}) # Get generator loss customer_g_optimizer.zero_grad() g_clip_norm_scale = 10 # STFT Loss sc_loss, mag_loss = stft_criterion(g_outputs.squeeze(1), a) g_loss = sc_loss + mag_loss losses.update({'loss_g_sc': sc_loss, 'loss_g_mag': mag_loss}) # Adversarial training for audio generation if adv_ag == True: fake_scores, fake_feats = model.discriminator(g_outputs) real_scores, real_feats = model.discriminator(a.unsqueeze(1)) adv_loss_list, feat_loss_list = [], [] for i, fake_score in enumerate(fake_scores): adv_loss_list.append( criterion(fake_score, torch.ones_like(fake_score))) adv_loss = sum(adv_loss_list) / len(adv_loss_list) for i in range(len(fake_feats)): for j in range(len(fake_feats[i])): feat_loss_list.append( l1_loss(fake_feats[i][j], real_feats[i][j].detach())) feat_loss = sum(feat_loss_list) / len(feat_loss_list) g_loss = g_loss + adv_loss * lamda_adv + feat_loss * lamda_feat losses.update({'loss_g_adv': adv_loss}) losses.update({'loss_g_feat': feat_loss}) g_clip_norm_scale = 0.5 # Adversarial training for feature disentanglement if adv_fd == True: split_x = torch.split(x, encoder_config['n_out_channels'], dim=1) pred = model.disentangler(split_x[0]) g_loss_fd = F.l1_loss(pred, torch.cat((split_x[1:]), dim=1).detach()) g_loss = g_loss + (-1.0) * g_loss_fd losses.update({'loss_g_fd': g_loss_fd}) g_loss.backward() nn.utils.clip_grad_norm_(g_parameters, max_norm=g_clip_norm_scale) customer_g_optimizer.step_and_update_lr() losses.update({'loss_g': g_loss}) # only output log of 0-th GPU if rank == 0: tbar.set_description("{:>7}: ".format(iteration) + ', '.join([ "{}: {:.1e}".format(x[5:], losses[x].item()) for x in losses.keys() ])) for x in losses: meters[x].add(losses[x].item()) if (iteration % iters_per_checkpoint == 0): checkpoint_path = "{}/model_{}".format( output_directory, iteration) save_checkpoint(model, g_optimizer, d_optimizer, iteration, checkpoint_path) iteration += 1
def _get_tag_batch(self, size, num): return to_gpu(self.cfg.cuda, Variable(torch.ones(*size, 1))) * num
def _add_gaussian_noise_to(self, code): # gaussian noise noise = torch.normal(mean=torch.zeros(code.size()), std=self.noise_radius) noise = to_gpu(self.cfg.cuda, Variable(noise)) return code + noise
def main(): params = parseyaml() if params['arch'] == 'Generator': device = to_gpu(ngpu=params['n_gpu']) if params['image_size'] == 64: netG = Generator(ngpu=0, nz=256, ngf=64, nc=64).to(device) elif params['image_size'] == 128: netG = Generator_128(ngpu=0, nz=256, ngf=64, nc=64).to(device) elif params['image_size'] == 256: netG = Generator_256(ngpu=0, nz=256, ngf=64, nc=64).to(device) netG.apply(weights_init) netG.load_state_dict(torch.load(params['path'])) for i in range(params['quantity']): fixed_noise = torch.randn(64, 256, 1, 1, device=device) fakes = netG(fixed_noise) for j in range(len(fakes)): save_image(fakes[j], params['out'] + params['run'] + '_' + str(i) + '_' + str(j) + '_img.png') else: dataloader = dataLoader( path=params['path'], image_size=params['image_size'], batch_size=params['batch_size'], workers=params['loader_workers']) device = to_gpu(ngpu=params['n_gpu']) if params['arch'] == 'DCGAN': if params['image_size'] == 64: netG = Generator(ngpu=params['n_gpu'], nz=params['latent_vector'], ngf=params['gen_feature_maps'], nc=params['number_channels']).to(device) netD = Discriminator(params['n_gpu'], nc=params['number_channels'], ndf=params['dis_feature_maps']).to(device) elif params['image_size'] == 128: netG = Generator_128(ngpu=params['n_gpu'], nz=params['latent_vector'], ngf=params['gen_feature_maps'], nc=params['number_channels']).to(device) netD = Discriminator_128(params['n_gpu'], nc=params['number_channels'], ndf=params['dis_feature_maps']).to(device) elif params['image_size'] == 256: netG = Generator_256(ngpu=params['n_gpu'], nz=params['latent_vector'], ngf=params['gen_feature_maps'], nc=params['number_channels']).to(device) netD = Discriminator_256(params['n_gpu'], nc=params['number_channels'], ndf=params['dis_feature_maps']).to(device) elif params['arch'] == 'SNGAN': if params['image_size'] == 64: netG = Generator(ngpu=params['n_gpu'], nz=params['latent_vector'], ngf=params['gen_feature_maps'], nc=params['number_channels']).to(device) netD = Discriminator_SN(params['n_gpu'], nc=params['number_channels'], ndf=params['dis_feature_maps']).to(device) elif params['image_size'] == 128: netG = Generator_128(ngpu=params['n_gpu'], nz=params['latent_vector'], ngf=params['gen_feature_maps'], nc=params['number_channels']).to(device) netD = Discriminator_SN_128(params['n_gpu'], nc=params['number_channels'], ndf=params['dis_feature_maps']).to(device) elif params['image_size'] == 256: netG = Generator_256(ngpu=params['n_gpu'], nz=params['latent_vector'], ngf=params['gen_feature_maps'], nc=params['number_channels']).to(device) netD = Discriminator_SN_256(params['n_gpu'], nc=params['number_channels'], ndf=params['dis_feature_maps']).to(device) if (device.type == 'cuda') and (params['n_gpu'] > 1): netG = nn.DataParallel(netG, list(range(params['n_gpu']))) if (device.type == 'cuda') and (params['n_gpu'] > 1): netD = nn.DataParallel(netD, list(range(params['n_gpu']))) netG.apply(weights_init) netD.apply(weights_init) print(netG) print(netD) criterion = nn.BCELoss() fixed_noise = torch.randn(params['image_size'], params['latent_vector'], 1, 1, device=device) if params['learning_rate'] >= 1: optimizerD = optim.Adam(netD.parameters(), lr=0.0002 * params['learning_rate'], betas=( params['beta_adam'], 0.999)) optimizerG = optim.Adam(netG.parameters(), lr=0.0002, betas=( params['beta_adam'], 0.999)) else: optimizerD = optim.Adam(netD.parameters(), lr=params['learning_rate'], betas=( params['beta_adam'], 0.999)) optimizerG = optim.Adam(netG.parameters(), lr=params['learning_rate'], betas=( params['beta_adam'], 0.999)) G_losses, D_losses, img_list, img_list_only = training_loop(num_epochs=params['num_epochs'], dataloader=dataloader, netG=netG, netD=netD, device=device, criterion=criterion, nz=params[ 'latent_vector'], optimizerG=optimizerG, optimizerD=optimizerD, fixed_noise=fixed_noise, out=params['out'] + params['run'] + '_') loss_plot(G_losses=G_losses, D_losses=D_losses, out=params['out'] + params['run'] + '_') image_grid(dataloader=dataloader, img_list=img_list, device=device, out=params['out'] + params['run'] + '_') compute_metrics(real=next(iter(dataloader)), fakes=img_list_only, size=params['image_size'], out=params['out'] + params['run'] + '_')